From 44cb9ad2630c335b38ef76d1f57d4d13dc8dfc7c Mon Sep 17 00:00:00 2001 From: "(David) Siu-Kei Muk" Date: Tue, 26 Dec 2017 18:35:34 +0800 Subject: [PATCH 0001/1357] adding ps_strategy to run_config to enable different placement strategy in estimator --- tensorflow/python/estimator/estimator.py | 3 ++- tensorflow/python/estimator/run_config.py | 23 +++++++++++++++--- .../python/estimator/run_config_test.py | 24 +++++++++++++++---- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 63103ef4c1..196c9e7d56 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -938,7 +938,8 @@ def _get_replica_device_setter(config): worker_device=worker_device, merge_devices=True, ps_ops=ps_ops, - cluster=config.cluster_spec) + cluster=config.cluster_spec, + ps_strategy=config.ps_strategy) else: return None diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 294a1caff3..9aba7beeee 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -27,9 +27,11 @@ import six from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.estimator import util _USE_DEFAULT = object() +_VALID_PS_STRATEGY_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. _DEFAULT_REPLACEABLE_LIST = [ @@ -41,7 +43,8 @@ _DEFAULT_REPLACEABLE_LIST = [ 'session_config', 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', - 'log_step_count_steps' + 'log_step_count_steps', + 'ps_strategy' ] _SAVE_CKPT_ERR = ( @@ -248,6 +251,10 @@ def _validate_properties(run_config): _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types), message='tf_random_seed must be integer.') + _validate('ps_strategy', lambda ps_strategy: six.callable(ps_strategy) and + set(util.fn_args(ps_strategy)) == set(['op']), + message='ps_strategy must be callable with exactly one argument "op".') + class TaskType(object): MASTER = 'master' @@ -269,7 +276,8 @@ class RunConfig(object): session_config=None, keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, - log_step_count_steps=100): + log_step_count_steps=100, + ps_strategy=None): """Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, @@ -392,6 +400,10 @@ class RunConfig(object): the feature. log_step_count_steps: The frequency, in number of global steps, that the global step/sec will be logged during training. + ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by + `ps_ops`), that takes the `Operation` and returns the ps task index to + use. If `None`, defaults to a round-robin strategy across all `ps` + devices. Raises: @@ -427,7 +439,8 @@ class RunConfig(object): session_config=session_config, keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, - log_step_count_steps=log_step_count_steps) + log_step_count_steps=log_step_count_steps, + ps_strategy=ps_strategy) self._init_distributed_setting_from_environment_var(tf_config) @@ -536,6 +549,10 @@ class RunConfig(object): def num_worker_replicas(self): return self._num_worker_replicas + @property + def ps_strategy(self): + return self._ps_strategy + @property def task_id(self): return self._task_id diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py index 9b7af60ff2..7277e5f1ac 100644 --- a/tensorflow/python/estimator/run_config_test.py +++ b/tensorflow/python/estimator/run_config_test.py @@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto' _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' +_PS_STRATEGY_ERR = 'ps_strategy must be callable with exactly one argument "op"' _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.' _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.' _INVALID_TASK_TYPE_FOR_EVAL_MASTER = ( @@ -83,6 +84,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(5, config.keep_checkpoint_max) self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) + self.assertIsNone(config.ps_strategy) def test_model_dir(self): empty_config = run_config_lib.RunConfig() @@ -93,6 +95,7 @@ class RunConfigTest(test.TestCase): def test_replace_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + ps_strategy = lambda op: 0 config = run_config_lib.RunConfig().replace( tf_random_seed=11, @@ -100,13 +103,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + ps_strategy=ps_strategy) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(ps_strategy, config.ps_strategy) def test_replace_none_value(self): config = run_config_lib.RunConfig().replace( @@ -117,7 +122,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + ps_strategy=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -126,6 +132,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.ps_strategy) def test_replace_with_disallowallowed_properties(self): config = run_config_lib.RunConfig() @@ -166,9 +173,12 @@ class RunConfigTest(test.TestCase): config.replace(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): config.replace(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR): + config.replace(ps_strategy=lambda x: 0) def test_init_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + ps_strategy = lambda op: 0 config = run_config_lib.RunConfig( tf_random_seed=11, @@ -176,13 +186,15 @@ class RunConfigTest(test.TestCase): save_checkpoints_secs=14, session_config=session_config, keep_checkpoint_max=16, - keep_checkpoint_every_n_hours=17) + keep_checkpoint_every_n_hours=17, + ps_strategy=ps_strategy) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(ps_strategy, config.ps_strategy) def test_init_none_value(self): config = run_config_lib.RunConfig( @@ -193,7 +205,8 @@ class RunConfigTest(test.TestCase): save_checkpoints_steps=None, session_config=None, keep_checkpoint_max=None, - keep_checkpoint_every_n_hours=None) + keep_checkpoint_every_n_hours=None, + ps_strategy=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -202,6 +215,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.ps_strategy) def test_init_invalid_values(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): @@ -220,6 +234,8 @@ class RunConfigTest(test.TestCase): run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): run_config_lib.RunConfig(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR): + run_config_lib.RunConfig(ps_strategy=lambda x: 0) class RunConfigDistributedSettingTest(test.TestCase): -- GitLab From 69ac707731c32b0cb856bf4682aeee73c4391e9e Mon Sep 17 00:00:00 2001 From: "(David) Siu-Kei Muk" Date: Mon, 19 Feb 2018 12:32:18 +0800 Subject: [PATCH 0002/1357] 1. Moved estimator._device_fn to RunConfig as @property 2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used 3. Added some basic unit tests, may need further tests. --- tensorflow/python/estimator/estimator.py | 4 +- tensorflow/python/estimator/run_config.py | 66 +++++++++++++++++++ .../python/estimator/run_config_test.py | 16 +++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 5553c58f55..23fc75adeb 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -207,7 +207,7 @@ class Estimator(object): else: self._session_config = self._config.session_config - self._device_fn = _get_replica_device_setter(self._config) + # self._device_fn = _get_replica_device_setter(self._config) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') @@ -811,7 +811,7 @@ class Estimator(object): def _train_model(self, input_fn, hooks, saving_listeners): worker_hooks = [] - with ops.Graph().as_default() as g, g.device(self._device_fn): + with ops.Graph().as_default() as g, g.device(self._config.device_fn): # g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) training_util._get_or_create_global_step_read() # pylint: disable=protected-access diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 646276abbe..14b4446601 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -28,12 +28,14 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.estimator import util +from tensorflow.python.training import training from tensorflow.python.util import compat_internal from tensorflow.python.util.tf_export import tf_export _USE_DEFAULT = object() _VALID_PS_STRATEGY_ARGS = set(['op']) +_VALID_DEVICE_FN_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. _DEFAULT_REPLACEABLE_LIST = [ @@ -46,6 +48,7 @@ _DEFAULT_REPLACEABLE_LIST = [ 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', 'log_step_count_steps', + 'device_fn', 'ps_strategy' ] @@ -281,6 +284,9 @@ def _validate_properties(run_config): _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types), message='tf_random_seed must be integer.') + _validate('device_fn', lambda device_fn: six.callable(device_fn) and + set(util.fn_args(device_fn)) == set(['op']), + message='device_fn must be callable with exactly one argument "op".') _validate('ps_strategy', lambda ps_strategy: six.callable(ps_strategy) and set(util.fn_args(ps_strategy)) == set(['op']), message='ps_strategy must be callable with exactly one argument "op".') @@ -308,6 +314,7 @@ class RunConfig(object): keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, + device_fn=None, ps_strategy=None): """Constructs a RunConfig. @@ -432,6 +439,9 @@ class RunConfig(object): the feature. log_step_count_steps: The frequency, in number of global steps, that the global step/sec will be logged during training. + device_fn: A callable invoked for every `Operation` that takes the + `Operation` and returns the device string. If `None`, defaults to + device function returned by `tf.train.replica_device_setter`. ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by `ps_ops`), that takes the `Operation` and returns the ps task index to use. If `None`, defaults to a round-robin strategy across all `ps` @@ -473,6 +483,7 @@ class RunConfig(object): keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, log_step_count_steps=log_step_count_steps, + device_fn=device_fn, ps_strategy=ps_strategy) self._init_distributed_setting_from_environment_var(tf_config) @@ -575,6 +586,22 @@ class RunConfig(object): def cluster_spec(self): return self._cluster_spec + @property + def device_fn(self): + """Returns the device_fn. + + If the device_fn is None, the device function returned by + `training.replica_device_setter` is used. + If the device_fn is not None, it is returned directly. + + Returns: + None for non-distributed setting, device_fn otherwise. + """ + if self._device_fn is None: + return _get_replica_device_setter(self) + + return self._device_fn + @property def evaluation_master(self): return self._evaluation_master @@ -702,6 +729,8 @@ class RunConfig(object): - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, + - `device_fn`, + - `ps_strategy` In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). @@ -785,3 +814,40 @@ def _get_model_dir(tf_config, model_dir): logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config) return model_dir or model_dir_in_tf_config + + +def _get_replica_device_setter(config): + """Creates a replica device setter if required as a default device_fn. + + `Estimator` uses ReplicaDeviceSetter as a default device placer. It sets the + distributed related arguments such as number of ps_replicas based on given + config. + + Args: + config: A `RunConfig` instance. + + Returns: + A replica device setter, or None. + """ + ps_ops = [ + 'Variable', 'VariableV2', 'AutoReloadVariable', 'MutableHashTable', + 'MutableHashTableV2', 'MutableHashTableOfTensors', + 'MutableHashTableOfTensorsV2', 'MutableDenseHashTable', + 'MutableDenseHashTableV2', 'VarHandleOp' + ] + + if config.task_type: + worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id) + else: + worker_device = '/job:worker' + + if config.num_ps_replicas > 0: + return training.replica_device_setter( + ps_tasks=config.num_ps_replicas, + worker_device=worker_device, + merge_devices=True, + ps_ops=ps_ops, + cluster=config.cluster_spec, + ps_strategy=config.ps_strategy) + else: + return None \ No newline at end of file diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py index 59652ef82d..12923c4373 100644 --- a/tensorflow/python/estimator/run_config_test.py +++ b/tensorflow/python/estimator/run_config_test.py @@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto' _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' +_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".' _PS_STRATEGY_ERR = 'ps_strategy must be callable with exactly one argument "op"' _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.' _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.' @@ -84,6 +85,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(5, config.keep_checkpoint_max) self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) + self.assertIsNone(config.device_fn) self.assertIsNone(config.ps_strategy) def test_model_dir(self): @@ -96,6 +98,7 @@ class RunConfigTest(test.TestCase): def test_replace_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) ps_strategy = lambda op: 0 + device_fn = lambda op: "/cpu:0" config = run_config_lib.RunConfig().replace( tf_random_seed=11, @@ -104,6 +107,7 @@ class RunConfigTest(test.TestCase): session_config=session_config, keep_checkpoint_max=16, keep_checkpoint_every_n_hours=17, + device_fn=device_fn, ps_strategy=ps_strategy) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) @@ -111,6 +115,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) self.assertEqual(ps_strategy, config.ps_strategy) def test_replace_none_value(self): @@ -123,6 +128,7 @@ class RunConfigTest(test.TestCase): session_config=None, keep_checkpoint_max=None, keep_checkpoint_every_n_hours=None, + device_fn=None, ps_strategy=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) @@ -132,6 +138,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) self.assertIsNone(config.ps_strategy) def test_replace_with_disallowallowed_properties(self): @@ -173,11 +180,14 @@ class RunConfigTest(test.TestCase): config.replace(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): config.replace(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + config.replace(device_fn=lambda x, y: 0) with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR): config.replace(ps_strategy=lambda x: 0) def test_init_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) + device_fn = lambda op: "/cpu:0" ps_strategy = lambda op: 0 config = run_config_lib.RunConfig( @@ -187,6 +197,7 @@ class RunConfigTest(test.TestCase): session_config=session_config, keep_checkpoint_max=16, keep_checkpoint_every_n_hours=17, + device_fn=device_fn, ps_strategy=ps_strategy) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) @@ -194,6 +205,7 @@ class RunConfigTest(test.TestCase): self.assertEqual(session_config, config.session_config) self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) + self.assertEqual(device_fn, config.device_fn) self.assertEqual(ps_strategy, config.ps_strategy) def test_init_none_value(self): @@ -206,6 +218,7 @@ class RunConfigTest(test.TestCase): session_config=None, keep_checkpoint_max=None, keep_checkpoint_every_n_hours=None, + device_fn=None, ps_strategy=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) @@ -215,6 +228,7 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.session_config) self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) + self.assertIsNone(config.device_fn) self.assertIsNone(config.ps_strategy) def test_init_invalid_values(self): @@ -234,6 +248,8 @@ class RunConfigTest(test.TestCase): run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0) with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR): run_config_lib.RunConfig(tf_random_seed=1.0) + with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): + run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0") with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR): run_config_lib.RunConfig(ps_strategy=lambda x: 0) -- GitLab From 51115ee74ed5b64cc03f18d523d8d48f36ef27ba Mon Sep 17 00:00:00 2001 From: "(David) Siu-Kei Muk" Date: Sat, 24 Feb 2018 14:32:36 +0800 Subject: [PATCH 0003/1357] 1. Removing ps_strategy. 2. Modified estimator to take overriden device_fn from if set. 3. Removed ps_strategy related unit tests. --- tensorflow/python/estimator/estimator.py | 7 +- tensorflow/python/estimator/run_config.py | 78 +++---------------- .../python/estimator/run_config_test.py | 24 +----- 3 files changed, 17 insertions(+), 92 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 23fc75adeb..821cbc10d2 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -207,7 +207,7 @@ class Estimator(object): else: self._session_config = self._config.session_config - # self._device_fn = _get_replica_device_setter(self._config) + self._device_fn = self._config.device_fn or _get_replica_device_setter(self._config) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') @@ -811,7 +811,7 @@ class Estimator(object): def _train_model(self, input_fn, hooks, saving_listeners): worker_hooks = [] - with ops.Graph().as_default() as g, g.device(self._config.device_fn): # g.device(self._device_fn): + with ops.Graph().as_default() as g, g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step_tensor = self._create_and_assert_global_step(g) training_util._get_or_create_global_step_read() # pylint: disable=protected-access @@ -1025,8 +1025,7 @@ def _get_replica_device_setter(config): worker_device=worker_device, merge_devices=True, ps_ops=ps_ops, - cluster=config.cluster_spec, - ps_strategy=config.ps_strategy) + cluster=config.cluster_spec) else: return None diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 14b4446601..b06f212ac0 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -34,7 +34,6 @@ from tensorflow.python.util.tf_export import tf_export _USE_DEFAULT = object() -_VALID_PS_STRATEGY_ARGS = set(['op']) _VALID_DEVICE_FN_ARGS = set(['op']) # A list of the property names in RunConfig that the user is allowed to change. @@ -48,8 +47,7 @@ _DEFAULT_REPLACEABLE_LIST = [ 'keep_checkpoint_max', 'keep_checkpoint_every_n_hours', 'log_step_count_steps', - 'device_fn', - 'ps_strategy' + 'device_fn' ] _SAVE_CKPT_ERR = ( @@ -285,11 +283,8 @@ def _validate_properties(run_config): message='tf_random_seed must be integer.') _validate('device_fn', lambda device_fn: six.callable(device_fn) and - set(util.fn_args(device_fn)) == set(['op']), + set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, message='device_fn must be callable with exactly one argument "op".') - _validate('ps_strategy', lambda ps_strategy: six.callable(ps_strategy) and - set(util.fn_args(ps_strategy)) == set(['op']), - message='ps_strategy must be callable with exactly one argument "op".') class TaskType(object): @@ -314,8 +309,7 @@ class RunConfig(object): keep_checkpoint_max=5, keep_checkpoint_every_n_hours=10000, log_step_count_steps=100, - device_fn=None, - ps_strategy=None): + device_fn=None): """Constructs a RunConfig. All distributed training related properties `cluster_spec`, `is_chief`, @@ -441,11 +435,8 @@ class RunConfig(object): global step/sec will be logged during training. device_fn: A callable invoked for every `Operation` that takes the `Operation` and returns the device string. If `None`, defaults to - device function returned by `tf.train.replica_device_setter`. - ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by - `ps_ops`), that takes the `Operation` and returns the ps task index to - use. If `None`, defaults to a round-robin strategy across all `ps` - devices. + the device function returned by `tf.train.replica_device_setter` + with round-robin strategy. Raises: @@ -483,8 +474,7 @@ class RunConfig(object): keep_checkpoint_max=keep_checkpoint_max, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, log_step_count_steps=log_step_count_steps, - device_fn=device_fn, - ps_strategy=ps_strategy) + device_fn=device_fn) self._init_distributed_setting_from_environment_var(tf_config) @@ -590,16 +580,10 @@ class RunConfig(object): def device_fn(self): """Returns the device_fn. - If the device_fn is None, the device function returned by - `training.replica_device_setter` is used. - If the device_fn is not None, it is returned directly. - - Returns: - None for non-distributed setting, device_fn otherwise. + If device_fn is not `None`, it overrides the default + device function used in `Estimator`. + Otherwise the default one is used. """ - if self._device_fn is None: - return _get_replica_device_setter(self) - return self._device_fn @property @@ -622,10 +606,6 @@ class RunConfig(object): def num_worker_replicas(self): return self._num_worker_replicas - @property - def ps_strategy(self): - return self._ps_strategy - @property def task_id(self): return self._task_id @@ -729,8 +709,7 @@ class RunConfig(object): - `keep_checkpoint_max`, - `keep_checkpoint_every_n_hours`, - `log_step_count_steps`, - - `device_fn`, - - `ps_strategy` + - `device_fn` In addition, either `save_checkpoints_steps` or `save_checkpoints_secs` can be set (should not be both). @@ -814,40 +793,3 @@ def _get_model_dir(tf_config, model_dir): logging.info('Using model_dir in TF_CONFIG: %s', model_dir_in_tf_config) return model_dir or model_dir_in_tf_config - - -def _get_replica_device_setter(config): - """Creates a replica device setter if required as a default device_fn. - - `Estimator` uses ReplicaDeviceSetter as a default device placer. It sets the - distributed related arguments such as number of ps_replicas based on given - config. - - Args: - config: A `RunConfig` instance. - - Returns: - A replica device setter, or None. - """ - ps_ops = [ - 'Variable', 'VariableV2', 'AutoReloadVariable', 'MutableHashTable', - 'MutableHashTableV2', 'MutableHashTableOfTensors', - 'MutableHashTableOfTensorsV2', 'MutableDenseHashTable', - 'MutableDenseHashTableV2', 'VarHandleOp' - ] - - if config.task_type: - worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id) - else: - worker_device = '/job:worker' - - if config.num_ps_replicas > 0: - return training.replica_device_setter( - ps_tasks=config.num_ps_replicas, - worker_device=worker_device, - merge_devices=True, - ps_ops=ps_ops, - cluster=config.cluster_spec, - ps_strategy=config.ps_strategy) - else: - return None \ No newline at end of file diff --git a/tensorflow/python/estimator/run_config_test.py b/tensorflow/python/estimator/run_config_test.py index 12923c4373..c8b12605e1 100644 --- a/tensorflow/python/estimator/run_config_test.py +++ b/tensorflow/python/estimator/run_config_test.py @@ -43,7 +43,6 @@ _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0' _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0' _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer' _DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument "op".' -_PS_STRATEGY_ERR = 'ps_strategy must be callable with exactly one argument "op"' _ONE_CHIEF_ERR = 'The "cluster" in TF_CONFIG must have only one "chief" node.' _ONE_MASTER_ERR = 'The "cluster" in TF_CONFIG must have only one "master" node.' _INVALID_TASK_TYPE_FOR_EVAL_MASTER = ( @@ -86,7 +85,6 @@ class RunConfigTest(test.TestCase): self.assertEqual(10000, config.keep_checkpoint_every_n_hours) self.assertIsNone(config.service) self.assertIsNone(config.device_fn) - self.assertIsNone(config.ps_strategy) def test_model_dir(self): empty_config = run_config_lib.RunConfig() @@ -97,7 +95,6 @@ class RunConfigTest(test.TestCase): def test_replace_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) - ps_strategy = lambda op: 0 device_fn = lambda op: "/cpu:0" config = run_config_lib.RunConfig().replace( @@ -107,8 +104,7 @@ class RunConfigTest(test.TestCase): session_config=session_config, keep_checkpoint_max=16, keep_checkpoint_every_n_hours=17, - device_fn=device_fn, - ps_strategy=ps_strategy) + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) @@ -116,7 +112,6 @@ class RunConfigTest(test.TestCase): self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) self.assertEqual(device_fn, config.device_fn) - self.assertEqual(ps_strategy, config.ps_strategy) def test_replace_none_value(self): config = run_config_lib.RunConfig().replace( @@ -128,8 +123,7 @@ class RunConfigTest(test.TestCase): session_config=None, keep_checkpoint_max=None, keep_checkpoint_every_n_hours=None, - device_fn=None, - ps_strategy=None) + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -139,7 +133,6 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) self.assertIsNone(config.device_fn) - self.assertIsNone(config.ps_strategy) def test_replace_with_disallowallowed_properties(self): config = run_config_lib.RunConfig() @@ -182,13 +175,10 @@ class RunConfigTest(test.TestCase): config.replace(tf_random_seed=1.0) with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): config.replace(device_fn=lambda x, y: 0) - with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR): - config.replace(ps_strategy=lambda x: 0) def test_init_with_allowed_properties(self): session_config = config_pb2.ConfigProto(allow_soft_placement=True) device_fn = lambda op: "/cpu:0" - ps_strategy = lambda op: 0 config = run_config_lib.RunConfig( tf_random_seed=11, @@ -197,8 +187,7 @@ class RunConfigTest(test.TestCase): session_config=session_config, keep_checkpoint_max=16, keep_checkpoint_every_n_hours=17, - device_fn=device_fn, - ps_strategy=ps_strategy) + device_fn=device_fn) self.assertEqual(11, config.tf_random_seed) self.assertEqual(12, config.save_summary_steps) self.assertEqual(14, config.save_checkpoints_secs) @@ -206,7 +195,6 @@ class RunConfigTest(test.TestCase): self.assertEqual(16, config.keep_checkpoint_max) self.assertEqual(17, config.keep_checkpoint_every_n_hours) self.assertEqual(device_fn, config.device_fn) - self.assertEqual(ps_strategy, config.ps_strategy) def test_init_none_value(self): config = run_config_lib.RunConfig( @@ -218,8 +206,7 @@ class RunConfigTest(test.TestCase): session_config=None, keep_checkpoint_max=None, keep_checkpoint_every_n_hours=None, - device_fn=None, - ps_strategy=None) + device_fn=None) self.assertIsNone(config.tf_random_seed) self.assertIsNone(config.model_dir) self.assertIsNone(config.save_summary_steps) @@ -229,7 +216,6 @@ class RunConfigTest(test.TestCase): self.assertIsNone(config.keep_checkpoint_max) self.assertIsNone(config.keep_checkpoint_every_n_hours) self.assertIsNone(config.device_fn) - self.assertIsNone(config.ps_strategy) def test_init_invalid_values(self): with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR): @@ -250,8 +236,6 @@ class RunConfigTest(test.TestCase): run_config_lib.RunConfig(tf_random_seed=1.0) with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR): run_config_lib.RunConfig(device_fn=lambda x: "/cpu:0") - with self.assertRaisesRegexp(ValueError, _PS_STRATEGY_ERR): - run_config_lib.RunConfig(ps_strategy=lambda x: 0) class RunConfigDistributedSettingTest(test.TestCase): -- GitLab From 584e3495b50db8fd0a894de8b6d85fcf4268a855 Mon Sep 17 00:00:00 2001 From: Sunitha Kambhampati Date: Tue, 13 Mar 2018 11:43:01 -0700 Subject: [PATCH 0004/1357] Fix floating point exception with bps calculation modified: tensorflow/contrib/tensorboard/db/loader.cc --- tensorflow/contrib/tensorboard/db/loader.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc index 4d7337a53d..9134296c74 100644 --- a/tensorflow/contrib/tensorboard/db/loader.cc +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -112,8 +112,10 @@ int main(int argc, char* argv[]) { } uint64 elapsed = env->NowMicros() - start; LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " - << AddCommas(records) << " records at " - << AddCommas(offset / (elapsed / 1000000)) << " bps"; + << AddCommas(records) << " records"; + if (elapsed > 0) { + LOG(INFO) << "bps=" << (uint64)(offset / (elapsed / 1000000.0)); + } return 0; } -- GitLab From 548415b9be78839a23a3909044329c3f221fa4b3 Mon Sep 17 00:00:00 2001 From: Sunitha Kambhampati Date: Wed, 28 Mar 2018 21:25:23 -0700 Subject: [PATCH 0005/1357] Use the same log line for bps and also report bps when elapsed is 0 --- tensorflow/contrib/tensorboard/db/loader.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc index 9134296c74..97b9daa361 100644 --- a/tensorflow/contrib/tensorboard/db/loader.cc +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -112,11 +112,10 @@ int main(int argc, char* argv[]) { } uint64 elapsed = env->NowMicros() - start; LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " - << AddCommas(records) << " records"; - if (elapsed > 0) { - LOG(INFO) << "bps=" << (uint64)(offset / (elapsed / 1000000.0)); - } - + << AddCommas(records) << " records at " + << (elapsed == 0 ? offset : static_cast( + offset / (elapsed / 1000000.0))) + << " bps"; return 0; } -- GitLab From b621ac047e43540992b3ac0e9055b9e7225e74da Mon Sep 17 00:00:00 2001 From: Sunitha Kambhampati Date: Thu, 29 Mar 2018 11:51:02 -0700 Subject: [PATCH 0006/1357] Add the commas back --- tensorflow/contrib/tensorboard/db/loader.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc index 97b9daa361..6439328022 100644 --- a/tensorflow/contrib/tensorboard/db/loader.cc +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -111,11 +111,10 @@ int main(int argc, char* argv[]) { ++records; } uint64 elapsed = env->NowMicros() - start; + uint64 bps = (elapsed == 0 ? offset : static_cast( + offset / (elapsed / 1000000.0))); LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " - << AddCommas(records) << " records at " - << (elapsed == 0 ? offset : static_cast( - offset / (elapsed / 1000000.0))) - << " bps"; + << AddCommas(records) << " records at " << AddCommas(bps) << " bps"; return 0; } -- GitLab From 203972b68a416725cd00fc3462345c9e7c0ebfa8 Mon Sep 17 00:00:00 2001 From: "(David) Siu-Kei Muk" Date: Tue, 3 Apr 2018 19:15:55 +0800 Subject: [PATCH 0007/1357] Adding manual initialization of _device_fn in legacy RunConfig class --- tensorflow/contrib/learn/python/learn/estimators/run_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index f3500bf56f..6d0f0b8da9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -299,6 +299,7 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig): # so instead of breaking compatibility with that assumption, we # just manually initialize this field: self._distribute = None + self._device_fn = None gpu_options = config_pb2.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) -- GitLab From cb54e6c766a152657c78cc4f91ebe81fc15b9b9c Mon Sep 17 00:00:00 2001 From: "(David) Siu-Kei Muk" Date: Thu, 5 Apr 2018 23:46:26 +0800 Subject: [PATCH 0008/1357] Updated estimator golden API through 1. bazel build //tensorflow/tools/api/tests:api_compatibility_test 2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True --- .../tools/api/golden/tensorflow.estimator.-run-config.pbtxt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt index 759ff752b0..6188840d90 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-run-config.pbtxt @@ -6,6 +6,10 @@ tf_class { name: "cluster_spec" mtype: "" } + member { + name: "device_fn" + mtype: "" + } member { name: "distribute" mtype: "" @@ -84,7 +88,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'distribute\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\'], " + argspec: "args=[\'self\', \'model_dir\', \'tf_random_seed\', \'save_summary_steps\', \'save_checkpoints_steps\', \'save_checkpoints_secs\', \'session_config\', \'keep_checkpoint_max\', \'keep_checkpoint_every_n_hours\', \'log_step_count_steps\', \'distribute\', \'device_fn\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'100\', \'\', \'\', \'None\', \'5\', \'10000\', \'100\', \'None\', \'None\'], " } member_method { name: "replace" -- GitLab From e5ff57da82742660262b7e77c2906d9621d0aaa3 Mon Sep 17 00:00:00 2001 From: "(David) Siu-Kei Muk" Date: Thu, 12 Apr 2018 22:59:05 +0800 Subject: [PATCH 0009/1357] fixing code styles --- tensorflow/python/estimator/estimator.py | 3 ++- tensorflow/python/estimator/run_config.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 022dbde30f..c3f7c8de3f 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -216,7 +216,8 @@ class Estimator(object): else: self._session_config = self._config.session_config - self._device_fn = self._config.device_fn or _get_replica_device_setter(self._config) + self._device_fn = self._config.device_fn or \ + _get_replica_device_setter(self._config) if model_fn is None: raise ValueError('model_fn must be provided to Estimator.') diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index bb0d900be4..40f114b274 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -28,7 +28,6 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.estimator import util -from tensorflow.python.training import training from tensorflow.python.util import compat_internal from tensorflow.python.util.tf_export import tf_export @@ -284,8 +283,9 @@ def _validate_properties(run_config): message='tf_random_seed must be integer.') _validate('device_fn', lambda device_fn: six.callable(device_fn) and - set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, - message='device_fn must be callable with exactly one argument "op".') + set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS, + message='device_fn must be callable with exactly' + ' one argument "op".') class TaskType(object): -- GitLab From c22d996c3d6a16db292bd3464b2ef7b91adae676 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 17 Apr 2018 01:00:44 +0800 Subject: [PATCH 0010/1357] Fix expand_dims of dims argument has been deprecated with axis --- .../contrib/layers/python/layers/target_column.py | 4 ++-- .../contrib/learn/python/learn/estimators/head.py | 10 +++++----- .../timeseries/state_space_models/state_space_model.py | 2 +- .../tools/compatibility/testdata/test_file_v0_11.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py index 3e639a180e..f3377f2a05 100644 --- a/tensorflow/contrib/layers/python/layers/target_column.py +++ b/tensorflow/contrib/layers/python/layers/target_column.py @@ -396,7 +396,7 @@ class _BinarySvmTargetColumn(_MultiClassTargetColumn): def _mean_squared_loss(logits, target): # To prevent broadcasting inside "-". if len(target.get_shape()) == 1: - target = array_ops.expand_dims(target, dim=[1]) + target = array_ops.expand_dims(target, axis=1) logits.get_shape().assert_is_compatible_with(target.get_shape()) return math_ops.square(logits - math_ops.to_float(target)) @@ -405,7 +405,7 @@ def _mean_squared_loss(logits, target): def _log_loss_with_two_classes(logits, target): # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. if len(target.get_shape()) == 1: - target = array_ops.expand_dims(target, dim=[1]) + target = array_ops.expand_dims(target, axis=1) loss_vec = nn.sigmoid_cross_entropy_with_logits( labels=math_ops.to_float(target), logits=logits) return loss_vec diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 2b4b6eff39..06f4173170 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -563,10 +563,10 @@ def _mean_squared_loss(labels, logits, weights=None): labels = ops.convert_to_tensor(labels) # To prevent broadcasting inside "-". if len(labels.get_shape()) == 1: - labels = array_ops.expand_dims(labels, dim=(1,)) + labels = array_ops.expand_dims(labels, axis=1) # TODO(zakaria): make sure it does not recreate the broadcast bug. if len(logits.get_shape()) == 1: - logits = array_ops.expand_dims(logits, dim=(1,)) + logits = array_ops.expand_dims(logits, axis=1) logits.get_shape().assert_is_compatible_with(labels.get_shape()) loss = math_ops.square(logits - math_ops.to_float(labels), name=name) return _compute_weighted_loss(loss, weights) @@ -579,10 +579,10 @@ def _poisson_loss(labels, logits, weights=None): labels = ops.convert_to_tensor(labels) # To prevent broadcasting inside "-". if len(labels.get_shape()) == 1: - labels = array_ops.expand_dims(labels, dim=(1,)) + labels = array_ops.expand_dims(labels, axis=1) # TODO(zakaria): make sure it does not recreate the broadcast bug. if len(logits.get_shape()) == 1: - logits = array_ops.expand_dims(logits, dim=(1,)) + logits = array_ops.expand_dims(logits, axis=1) logits.get_shape().assert_is_compatible_with(labels.get_shape()) loss = nn.log_poisson_loss(labels, logits, compute_full_loss=True, name=name) @@ -797,7 +797,7 @@ def _log_loss_with_two_classes(labels, logits, weights=None): # TODO(ptucker): This will break for dynamic shapes. # sigmoid_cross_entropy_with_logits requires [batch_size, 1] labels. if len(labels.get_shape()) == 1: - labels = array_ops.expand_dims(labels, dim=(1,)) + labels = array_ops.expand_dims(labels, axis=1) loss = nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits, name=name) return _compute_weighted_loss(loss, weights) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py index 951c6546d5..d04c721007 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py @@ -909,7 +909,7 @@ class StateSpaceModel(model.SequentialTimeSeriesModel): elif unbroadcasted_shape.ndims == 2: # Unbroadcasted shape [num features x state dimension] broadcasted_model = array_ops.tile( - array_ops.expand_dims(unbroadcasted_model, dim=0), + array_ops.expand_dims(unbroadcasted_model, axis=0), [array_ops.shape(times)[0], 1, 1]) elif unbroadcasted_shape.ndims == 3: broadcasted_model = unbroadcasted_model diff --git a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py index 01f37d8768..40526d930c 100644 --- a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py +++ b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py @@ -94,7 +94,7 @@ class TestUpgrade(test_util.TensorFlowTestCase): self.assertAllClose( tf.reduce_logsumexp(a, [0, 1]).eval(), 6.45619344711) self.assertAllEqual( - tf.expand_dims([[1, 2], [3, 4]], dim=1).eval(), + tf.expand_dims([[1, 2], [3, 4]], axis=1).eval(), [[[1, 2]], [[3, 4]]]) def testArgMinMax(self): -- GitLab From f35dc0a522ae630902baa5be16d2a53b59266770 Mon Sep 17 00:00:00 2001 From: Bruno Goncalves <882745+brunomorishita@users.noreply.github.com> Date: Sat, 28 Apr 2018 19:24:22 -0300 Subject: [PATCH 0011/1357] Fix cmake library path for libpng16.a --- tensorflow/contrib/cmake/external/png.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index ad2af01bc0..1a147e9c8e 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================== include (ExternalProject) +include (GNUInstallDirs) set(png_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/png_archive) set(png_URL https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz) @@ -35,7 +36,7 @@ if(WIN32) endif() endif() else() - set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng16.a) + set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/${CMAKE_INSTALL_LIBDIR}/libpng16.a) endif() set(png_HEADERS -- GitLab From 071e6175dcc130b4c623e849a380d6434289eb66 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 24 May 2018 15:47:00 +0200 Subject: [PATCH 0012/1357] Added the -Thost=x64 flag to cmake build instructions --- tensorflow/contrib/cmake/README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 0b79f718d4..5c203b777c 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -106,17 +106,6 @@ Step-by-step Windows build 1. Install the prerequisites detailed above, and set up your environment. - * The following commands assume that you are using the Windows Command - Prompt (`cmd.exe`). You will need to set up your environment to use the - appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets - we will build are too large for the 32-bit tools, and they will fail with - out-of-memory errors.) The typical command to do set up your - environment is: - - ``` - D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" - ``` - * When building with GPU support after installing the CUDNN zip file from NVidia, append its bin directory to your PATH environment variable. In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable. @@ -168,7 +157,7 @@ Step-by-step Windows build and must be the last character on each line. ``` - D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^ + D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^ More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib @@ -197,6 +186,10 @@ Step-by-step Windows build not currently supported, because it relies on a `Debug` library for Python (`python35d.lib`) that is not distributed by default. + The `-Thost=x64` flag will ensure that the 64 bit compiler and linker + is used when building. Without this flag, MSBuild will use the 32 bit + toolchain which is prone to compile errors such as "compiler out of heap space". + There are various options that can be specified when generating the solution and project files: @@ -263,6 +256,11 @@ Step-by-step Windows build 4. Invoke MSBuild to build TensorFlow. + Set up the path to find MSbuild: + ``` + D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" + ``` + To build the C++ example program, which will be created as a `.exe` executable in the subdirectory `.\Release`: -- GitLab From 6890731b2693f6b71dedaca6b2eaf8b488226836 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 24 May 2018 15:47:22 +0200 Subject: [PATCH 0013/1357] increase minimum cmake version required to 3.8 --- tensorflow/contrib/cmake/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 0708d6b7b9..225c5e6227 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,5 +1,9 @@ # Minimum CMake required -cmake_minimum_required(VERSION 3.5) +if(WIN32) + cmake_minimum_required(VERSION 3.8) +else() + cmake_minimum_required(VERSION 3.5) +endif() # Project project(tensorflow C CXX) -- GitLab From f78fd433118830482dddbf6055751898a19265de Mon Sep 17 00:00:00 2001 From: jiefangxuanyan <505745416@qq.com> Date: Wed, 13 Jun 2018 17:28:23 +0800 Subject: [PATCH 0014/1357] Specify endianness in expected_result array to fix #15767. --- tensorflow/python/kernel_tests/decode_raw_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py index 122a9ed469..0bd8bc3c7b 100644 --- a/tensorflow/python/kernel_tests/decode_raw_op_test.py +++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py @@ -79,7 +79,7 @@ class DecodeRawOpTest(test.TestCase): decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16) self.assertEqual([None, None], decode.get_shape().as_list()) - expected_result = np.matrix([[1, -2, -3, 4]], dtype=np.float16) + expected_result = np.matrix([[1, -2, -3, 4]], dtype=" Date: Sun, 1 Jul 2018 01:13:06 +0800 Subject: [PATCH 0015/1357] Removed unused lambda capture --- tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc index f9f36443a8..6824e0f89f 100644 --- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -50,7 +50,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass { } for (Node* n : matches) { AttrSlice n_attrs = n->attrs(); - auto base_make_node = [n, g, &n_attrs](const string& op, + auto base_make_node = [n, &n_attrs](const string& op, const string& name) { NodeBuilder node_builder(name, op); node_builder.Device(n->requested_device()); -- GitLab From f7a00dbf1799f3fb3900b0788047e460a9abfd31 Mon Sep 17 00:00:00 2001 From: naurril Date: Sun, 1 Jul 2018 01:47:25 +0800 Subject: [PATCH 0016/1357] Removed unused lambda capture --- tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc index 6824e0f89f..0f853ae52a 100644 --- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -60,7 +60,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass { } return node_builder; }; - auto make_node = [n, g, &n_attrs, &base_make_node](string op) { + auto make_node = [n, g, &base_make_node](string op) { return base_make_node( op, g->NewName(strings::StrCat(n->name(), "/Internal"))); }; -- GitLab From e5a7c13a8f15b0f98df849fbe3196f2ecedec04e Mon Sep 17 00:00:00 2001 From: naurril Date: Tue, 3 Jul 2018 00:21:25 +0800 Subject: [PATCH 0017/1357] cleanup CondContext at execption --- tensorflow/python/ops/control_flow_ops.py | 30 +++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index fc37805c79..386305ba30 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -2044,22 +2044,26 @@ def cond(pred, # Build the graph for the true branch in a new context. context_t = CondContext(pred, pivot_1, branch=1) - context_t.Enter() - orig_res_t, res_t = context_t.BuildCondBranch(true_fn) - if orig_res_t is None: - raise ValueError("true_fn must have a return value.") - context_t.ExitResult(res_t) - context_t.Exit() + try: + context_t.Enter() + orig_res_t, res_t = context_t.BuildCondBranch(true_fn) + if orig_res_t is None: + raise ValueError("true_fn must have a return value.") + context_t.ExitResult(res_t) + finally: + context_t.Exit() # Build the graph for the false branch in a new context. context_f = CondContext(pred, pivot_2, branch=0) - context_f.Enter() - orig_res_f, res_f = context_f.BuildCondBranch(false_fn) - if orig_res_f is None: - raise ValueError("false_fn must have a return value.") - context_f.ExitResult(res_f) - context_f.Exit() - + try: + context_f.Enter() + orig_res_f, res_f = context_f.BuildCondBranch(false_fn) + if orig_res_f is None: + raise ValueError("false_fn must have a return value.") + context_f.ExitResult(res_f) + finally: + context_f.Exit() + if not strict: orig_res_t = _UnpackIfSingleton(orig_res_t) orig_res_f = _UnpackIfSingleton(orig_res_f) -- GitLab From 9bab0c89c4ffeeb780e7a3dc415ab888164b9b00 Mon Sep 17 00:00:00 2001 From: "candy.dc" Date: Thu, 26 Jul 2018 11:36:30 +0800 Subject: [PATCH 0018/1357] fix: No need to convert to tensor when using ResourceVariable in embedding_lookup, because ResourceVariable support ResourceGather OP. --- tensorflow/contrib/layers/python/layers/embedding_ops.py | 7 ++++--- tensorflow/python/feature_column/feature_column_v2.py | 7 ++++--- tensorflow/python/ops/embedding_ops.py | 7 ++++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 60e1d85ea9..897aed527d 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -112,9 +112,10 @@ def safe_embedding_lookup_sparse(embedding_weights, dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable): + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index b4dd23f58d..220a4f7ed6 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -3283,9 +3283,10 @@ def _safe_embedding_lookup_sparse(embedding_weights, raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable): + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 27c2fa7017..fe422f5095 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -545,9 +545,10 @@ def safe_embedding_lookup_sparse(embedding_weights, raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable): + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, -- GitLab From aba7fcaf87f8d4099212db2e3bffad1dbab168a2 Mon Sep 17 00:00:00 2001 From: shaohua Date: Thu, 26 Jul 2018 15:00:53 +0800 Subject: [PATCH 0019/1357] Fix gcc6.3 build link issue Signed-off-by: shaohua --- tensorflow/tensorflow.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 340d3f393c..054d68d42c 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -404,7 +404,7 @@ def tf_gen_op_wrapper_cc(name, tf_cc_binary( name=tool, copts=tf_copts(), - linkopts=if_not_windows(["-lm"]), + linkopts=if_not_windows(["-lm","-Wl,-ldl"]), linkstatic=1, # Faster to link this one-time-use binary dynamically deps=[op_gen] + deps) @@ -573,7 +573,7 @@ def tf_gen_op_wrapper_py(name, deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))] tf_cc_binary( name=tool_name, - linkopts=if_not_windows(["-lm"]) + cc_linkopts, + linkopts=if_not_windows(["-lm","-Wl,-ldl"]) + cc_linkopts, copts=tf_copts(), linkstatic=1, # Faster to link this one-time-use binary dynamically deps=([ -- GitLab From 27de8e717c1bec91398f5a6be6c7287b657fc960 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 02:29:43 +0000 Subject: [PATCH 0020/1357] Improve shape function for CudnnRNNParamsSize In cudnn_rnn_ops.cc, the CudnnRNNParamsSize does not have restrictions on num_layers, num_units, and input_size, though they all should be scalars. This fix adds the shape check of num_layers, num_units, and input_size for CudnnRNNParamsSize. Signed-off-by: Yong Tang --- tensorflow/core/ops/cudnn_rnn_ops.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc index f78f7a897a..7eb141aa8c 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops.cc @@ -52,6 +52,12 @@ REGISTER_OP("CudnnRNNParamsSize") .Attr("seed2: int = 0") .Output("params_size: S") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + // num_layers, num_units, and input_size should be scalars. + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(0, c->Vector(1)); return Status::OK(); }); -- GitLab From 01387ccddcf5c23d48c5745f4a6a49a670f528aa Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 04:28:08 +0000 Subject: [PATCH 0021/1357] Add test cases for shape function of CudnnRNNParamsSize Signed-off-by: Yong Tang --- .../python/kernel_tests/cudnn_rnn_ops_test.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py index 5a667485be..675b7ce185 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py @@ -413,6 +413,28 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase): self._testOneLSTMParamsSize(num_layers, num_units, input_size, direction) + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testLSTMParamsSizeShape(self): + with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + constant_op.constant([4]), 200, 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, constant_op.constant([200]), 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, 200, constant_op.constant([200]), + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + class CudnnRNNTestInference(TensorFlowTestCase): -- GitLab From d27b5a3e5458c82ce1ca3cda1a9879149c779959 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 14:52:52 +0000 Subject: [PATCH 0022/1357] Pylint fix Signed-off-by: Yong Tang --- .../python/kernel_tests/cudnn_rnn_ops_test.py | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py index 675b7ce185..c59d3682d4 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py @@ -416,24 +416,27 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase): @unittest.skipUnless(test.is_built_with_cuda(), "Test only applicable when running on GPUs") def testLSTMParamsSizeShape(self): - with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): - model = _CreateModel( - cudnn_rnn_ops.CUDNN_LSTM, - constant_op.constant([4]), 200, 200, - direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) - params_size = model.params_size() - with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): - model = _CreateModel( - cudnn_rnn_ops.CUDNN_LSTM, - 4, constant_op.constant([200]), 200, - direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) - params_size = model.params_size() - with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): - model = _CreateModel( - cudnn_rnn_ops.CUDNN_LSTM, - 4, 200, constant_op.constant([200]), - direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) - params_size = model.params_size() + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + constant_op.constant([4]), 200, 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, constant_op.constant([200]), 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, 200, constant_op.constant([200]), + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() class CudnnRNNTestInference(TensorFlowTestCase): -- GitLab From c86327921c6e5e918250652558e4075abd88c6f4 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 14:53:02 +0000 Subject: [PATCH 0023/1357] Add additional unit test in c++ for cudnn_rnn_ops Signed-off-by: Yong Tang --- tensorflow/core/ops/cudnn_rnn_ops_test.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc index 2dd867561b..095ee1fc95 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc @@ -26,7 +26,19 @@ namespace tensorflow { TEST(CudnnRNNOpsTest, ParamsSize_ShapeFn) { ShapeInferenceTestOp op("CudnnRNNParamsSize"); - INFER_OK(op, "[1];[1];[1]", "[1]"); + INFER_OK(op, "[];[];[]", "[1]"); + INFER_OK(op, "?;[];[]", "[1]"); + INFER_OK(op, "[];?;[]", "[1]"); + INFER_OK(op, "[];[];?", "[1]"); + INFER_OK(op, "[];?;?", "[1]"); + INFER_OK(op, "?;?;?", "[1]"); + + INFER_ERROR("Shape must be rank 0 ", op, + "[1,2];?;[]"); + INFER_ERROR("Shape must be rank 0 ", op, + "?;[2];[]"); + INFER_ERROR("Shape must be rank 0 ", op, + "?;?;[1]"); } TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) { -- GitLab From 2e436951bb63a0294848b6f6d3746e449a305ad1 Mon Sep 17 00:00:00 2001 From: Stefan Dyulgerov Date: Tue, 17 Jul 2018 22:37:19 +0300 Subject: [PATCH 0024/1357] version_info.cc generated only once version_info.cc in the cmake files is generated every time when we build tensorflow and this forces rebuild of the whole project, since it is in the core library. added make.bat for windows, which does the same as make.sh to be executed easily from a build machine. the default now is visual studio 17 --- tensorflow/contrib/cmake/make.bat | 38 +++++++++++++++++++ .../contrib/cmake/tf_core_framework.cmake | 23 +++++++---- 2 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 tensorflow/contrib/cmake/make.bat diff --git a/tensorflow/contrib/cmake/make.bat b/tensorflow/contrib/cmake/make.bat new file mode 100644 index 0000000000..d52b24e01d --- /dev/null +++ b/tensorflow/contrib/cmake/make.bat @@ -0,0 +1,38 @@ +%echo off + +cd /d %~dp0 + +if exist _build rd /s /q _build + +mkdir _build +chdir _build + + +rem cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install + +CALL :NORMALIZEPATH "..\..\..\.." +SET SOURCE_DIR=%RETVAL% + +echo %SOURCE_DIR% + +SET SOURCE_DIR=F:\frameworks\tensorflow\ + +CALL :NORMALIZEPATH "../../../tools/git/gen_git_source.py" +SET SOURCE_PYTHON_SCRIPT=%RETVAL% + +CALL :NORMALIZEPATH "../../../core/util/version_info.cc" +SET SOURCE_VERSION_CC=%RETVAL% + +python %SOURCE_PYTHON_SCRIPT% --raw_generate %SOURCE_VERSION_CC% --source_dir %SOURCE_DIR% --git_tag_override= + +cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install + +EXIT /B + +:NORMALIZEPATH + SET RETVAL=%~dpfn1 + EXIT /B + + + + \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 067c299a71..7e806685b8 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -258,14 +258,21 @@ add_dependencies(tf_core_lib ${tensorflow_EXTERNAL_DEPENDENCIES} tf_protos_cc) # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run # ${VERSION_INFO_CC} would cache, but it depends on a phony never produced # target. -set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) -add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC}) -add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) -add_custom_command(OUTPUT - ${VERSION_INFO_CC} - COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} - DEPENDS __force_rebuild) +# This code forces rebuild every time, not needed as version from git is fetched only once +# move to make.bat which mimicks make.sh + +if (NOT WIN32) + + set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) + add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC}) + add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) + add_custom_command(OUTPUT + ${VERSION_INFO_CC} + COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py + ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} + DEPENDS __force_rebuild) +endif() + set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) ######################################################## -- GitLab From 0d7b11f4d63f9bae0d0e4001dd96ce840810210b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 5 Aug 2018 17:23:47 +0000 Subject: [PATCH 0025/1357] Fix op_scope warning in adjust_gamma While running the following op_scope causes the warning: ``` Python 3.5.2 (default, Nov 23 2017, 16:37:01) [GCC 5.4.0 20160609] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import tensorflow as tf i>>> import numpy as np >>> tf.image.adjust_gamma(np.random.uniform(0.0, 255.0, (8, 8)), gamma=1) WARNING:tensorflow:tf.op_scope(values, name, default_name) is deprecated, use tf.name_scope(name, default_name, values) >>> ``` This fix fixes the warning by switching op_scope to name_scope. Signed-off-by: Yong Tang --- tensorflow/python/ops/image_ops_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 855a4d0c33..1b11b8b074 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1377,7 +1377,7 @@ def adjust_gamma(image, gamma=1, gain=1): [1] http://en.wikipedia.org/wiki/Gamma_correction """ - with ops.op_scope([image, gamma, gain], None, 'adjust_gamma'): + with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: # Convert pixel value to DT_FLOAT for computing adjusted image. img = ops.convert_to_tensor(image, name='img', dtype=dtypes.float32) # Keep image dtype for computing the scale of corresponding dtype. -- GitLab From b81f4bb5468b0fdf9e36591d3a7d56740bedb7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 9 Aug 2018 14:34:52 +0800 Subject: [PATCH 0026/1357] ENH: implement feature importances --- .../python/estimator/canned/boosted_trees.py | 105 +++++++++++++++++- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 8b423f76de..060f5cb3fa 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -21,6 +21,11 @@ import abc import collections import functools +import numpy as np + +from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.client import session as tf_session +from tensorflow.python.eager import context from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib @@ -38,7 +43,9 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary +from tensorflow.python.training import checkpoint_management from tensorflow.python.training import distribute as distribute_lib +from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import estimator_export @@ -54,6 +61,8 @@ _HOLD_FOR_MULTI_DIM_SUPPORT = object() _DUMMY_NUM_BUCKETS = -1 _DUMMY_NODE_ID = -1 +_BOOSTED_TREES_SERIALIZED_PROTO = '_BOOSTED_TREES_SERIALIZED_PROTO' + def _get_transformed_features(features, sorted_feature_columns): """Gets the transformed features from features/feature_columns pair. @@ -736,6 +745,8 @@ def _bt_model_fn( bucketized_features=input_feature_list, logits_dimension=head.logits_dimension) else: + _, serialized_proto = tree_ensemble.serialize() + ops.add_to_collection(_BOOSTED_TREES_SERIALIZED_PROTO, serialized_proto) if is_single_machine: local_tree_ensemble = tree_ensemble ensemble_reload = control_flow_ops.no_op() @@ -910,8 +921,92 @@ def _create_regression_head(label_dimension, weight_column=None): # pylint: enable=protected-access +def _compute_feature_importance_for_tree(tree, num_features, normalize): + importances = np.zeros(num_features) + + for node in tree.nodes: + node_type = node.WhichOneof('node') + if node_type == 'bucketized_split': + feature_id = node.bucketized_split.feature_id + importances[feature_id] += node.metadata.gain + elif node_type == 'leaf': + assert node.metadata.gain == 0 + else: + raise ValueError('Unexpected split type %s', node_type) + + if normalize: + normalizer = np.sum(importances) + if normalizer > 0.0: + # Avoid dividing by zero (e.g., when root is pure) + importances /= normalizer + + return importances + + +def compute_feature_importances(tree_ensemble, num_features, normalize=True): + tree_importances = [_compute_feature_importance_for_tree(tree, + num_features, + normalize) + for tree in tree_ensemble.trees] + tree_importances = np.array(tree_importances) + tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) + feature_importances = np.sum(tree_importances * tree_weights, + axis=0) / np.sum(tree_weights) + if normalize: + normalizer = np.sum(feature_importances) + if normalizer > 0.0: + feature_importances /= normalizer + + sorted_feature = np.argsort(feature_importances)[::-1] + return sorted_feature, feature_importances[sorted_feature] + + +class _BoostedTrees(estimator.Estimator): + + def __init__(self, model_fn, model_dir, config, feature_columns): + super(_BoostedTrees, self).__init__( + model_fn=model_fn, model_dir=model_dir, config=config) + + sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) + self._num_features = _calculate_num_features(sorted_feature_columns) + + def compute_feature_importances(self, normalize=True): + tree_ensemble = self._read_tree_ensemble_from_checkpoint() + if tree_ensemble: + return compute_feature_importances(tree_ensemble, + self._num_features, + normalize) + else: + return [], [] + + def _read_tree_ensemble_from_checkpoint(self): + with context.graph_mode(): + checkpoint_path = checkpoint_management.latest_checkpoint( + self._model_dir) + if not checkpoint_path: + raise ValueError("Couldn't find trained model at %s." % self._model_dir) + + with ops.Graph().as_default() as g: + with tf_session.Session(config=self._session_config) as session: + meta_file = checkpoint_path + '.meta' + graph_saver = saver.import_meta_graph(meta_file) + graph_saver.restore(session, checkpoint_path) + + serialized_proto = ops.get_collection(_BOOSTED_TREES_SERIALIZED_PROTO) + assert len(serialized_proto) == 1 + serialized_proto_string = session.run(serialized_proto[0]) + + if serialized_proto_string: + tree_ensemble = boosted_trees_pb2.TreeEnsemble() + tree_ensemble.ParseFromString(serialized_proto_string) + return tree_ensemble + else: + # serialized_proto_string is empty string before training. + return None + + @estimator_export('estimator.BoostedTreesClassifier') -class BoostedTreesClassifier(estimator.Estimator): +class BoostedTreesClassifier(_BoostedTrees): """A Classifier for Tensorflow Boosted Trees models. @compatibility(eager) @@ -1046,11 +1141,12 @@ class BoostedTreesClassifier(estimator.Estimator): closed_form_grad_and_hess_fn=closed_form) super(BoostedTreesClassifier, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) + model_fn=_model_fn, model_dir=model_dir, config=config, + feature_columns=feature_columns) @estimator_export('estimator.BoostedTreesRegressor') -class BoostedTreesRegressor(estimator.Estimator): +class BoostedTreesRegressor(_BoostedTrees): """A Regressor for Tensorflow Boosted Trees models. @compatibility(eager) @@ -1169,4 +1265,5 @@ class BoostedTreesRegressor(estimator.Estimator): n_batches_per_layer, config) super(BoostedTreesRegressor, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) + model_fn=_model_fn, model_dir=model_dir, config=config, + feature_columns=feature_columns) -- GitLab From 54fbe83c1bc50510a7712ab78aaf369ba562538e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 9 Aug 2018 14:35:19 +0800 Subject: [PATCH 0027/1357] TST: add test case --- .../estimator/canned/boosted_trees_test.py | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index ec597e4686..054d820527 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -91,6 +91,17 @@ def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None): return _input_fn +def _compute_feature_importances_np(feature_gains, normalize): + if normalize: + feature_gains /= np.sum(feature_gains, axis=1, keepdims=True) + feature_gains = np.nan_to_num(feature_gains) + feature_importances = np.sum(feature_gains, axis=0) / len(feature_gains) + feature_importances /= np.sum(feature_importances) + return np.nan_to_num(feature_importances) + else: + return np.sum(feature_gains, axis=0) / len(feature_gains) + + class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): def setUp(self): @@ -154,6 +165,10 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) + self.assertEqual(3, est._num_features) # pylint:disable=protected-access + sorted_features, importances = est.compute_feature_importances() + self.assertAllEqual([], sorted_features) + self.assertAllEqual([], importances) def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -544,6 +559,85 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) + def testCalculateFeatureImportances(self): + input_fn = _make_train_input_fn(is_classification=True) + + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + self.assertEqual(3, est._num_features) # pylint:disable=protected-access + # It will stop after 5 steps because of the max depth and num trees. + num_steps = 100 + # Train for a few steps, and validate final checkpoint. + est.train(input_fn, steps=num_steps) + + # TreeEnsemble Proto: + # tree_ensemble: trees { + # nodes { + # bucketized_split { + # feature_id: 2 + # threshold: 2 + # left_id: 1 + # right_id: 2 + # } + # metadata { + # gain: 0.426666676998 + # } + # } + # ...... + # nodes { + # bucketized_split { + # threshold: 1 + # left_id: 5 + # right_id: 6 + # } + # metadata { + # gain: 0.133481562138 + # original_leaf { + # scalar: 0.066666662693 + # } + # } + # } + # ...... + # nodes { + # bucketized_split { + # left_id: 11 + # right_id: 12 + # } + # metadata { + # gain: 0.400360047817 + # original_leaf { + # scalar: 0.0599950700998 + # } + # } + # } + # } + # trees { + # nodes { + # leaf { + # } + # } + # } + # tree_weights: 1.0 + # tree_weights: 1.0 + # ...... + sorted_features_expected = [0, 2, 1] + feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. + [0.0, 0.0, 0.0]] # 2nd tree. + + sorted_features, importances = est.compute_feature_importances(normalize=False) + self.assertAllEqual(sorted_features_expected, sorted_features) + self.assertAllClose(_compute_feature_importances_np(feature_gains, False), + importances) + + sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + self.assertAllEqual(sorted_features_expected, sorted_features1) + self.assertAllClose(_compute_feature_importances_np(feature_gains, True), + importances1) + class ModelFnTests(test_util.TensorFlowTestCase): """Tests bt_model_fn including unexposed internal functionalities.""" -- GitLab From b127c201cda558db21ce5f48f5899593d73da46b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 10 Aug 2018 20:37:32 +0000 Subject: [PATCH 0028/1357] Fix clang-format issue in `Experimental clang-format Check` Signed-off-by: Yong Tang --- tensorflow/core/ops/cudnn_rnn_ops.cc | 3 --- tensorflow/core/ops/cudnn_rnn_ops_test.cc | 9 +++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc index 7eb141aa8c..f84142c992 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops.cc @@ -37,7 +37,6 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; - REGISTER_OP("CudnnRNNParamsSize") .Input("num_layers: int32") .Input("num_units: int32") @@ -62,7 +61,6 @@ REGISTER_OP("CudnnRNNParamsSize") return Status::OK(); }); - REGISTER_OP("CudnnRNN") .Input("input: T") .Input("input_h: T") @@ -254,7 +252,6 @@ REGISTER_OP("CudnnRNNParamsToCanonical") return Status::OK(); }); - REGISTER_OP("CudnnRNNCanonicalToParams") .Input("num_layers: int32") .Input("num_units: int32") diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc index 095ee1fc95..13c3b933f4 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc @@ -33,12 +33,9 @@ TEST(CudnnRNNOpsTest, ParamsSize_ShapeFn) { INFER_OK(op, "[];?;?", "[1]"); INFER_OK(op, "?;?;?", "[1]"); - INFER_ERROR("Shape must be rank 0 ", op, - "[1,2];?;[]"); - INFER_ERROR("Shape must be rank 0 ", op, - "?;[2];[]"); - INFER_ERROR("Shape must be rank 0 ", op, - "?;?;[1]"); + INFER_ERROR("Shape must be rank 0 ", op, "[1,2];?;[]"); + INFER_ERROR("Shape must be rank 0 ", op, "?;[2];[]"); + INFER_ERROR("Shape must be rank 0 ", op, "?;?;[1]"); } TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) { -- GitLab From 29f596cf21f0332c1e2ece8798fdd9fefd2ba947 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 14:04:59 +0000 Subject: [PATCH 0029/1357] Improve the shape function of Bincount There was not a lot of restriction in shape function of Bincount and the output shape was unknown. It is actually possible to get a better shape output if `size` input is known. This fix adds enhancement to the shape function of Bincount. Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 1667c398f4..7d0f29368b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1416,6 +1416,10 @@ REGISTER_OP("Bincount") .Attr("T: {int32, int64, float32, float64}") .Output("bins: T") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + // The input `size` must be a scalar. + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + c->set_output(0, c->UnknownShapeOfRank(1)); return Status::OK(); }); -- GitLab From 740c58b6fa5b6e1c85f688fbda322da0231aa169 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 14:44:44 +0000 Subject: [PATCH 0030/1357] Return `[size]` shape if size is known for Bincount. Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 7d0f29368b..b57385f63b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1420,7 +1420,19 @@ REGISTER_OP("Bincount") // The input `size` must be a scalar. TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - c->set_output(0, c->UnknownShapeOfRank(1)); + const Tensor* size_tensor = c->input_tensor(1); + if (size_tensor == nullptr) { + // Return unknown shape if size is not known. + c->set_output(0, c->UnknownShapeOfRank(1)); + return Status::OK(); + } + + // Return `[size]` shape if size is known. + int32 size_val = size_tensor->scalar()(); + if (size_val < 0) { + return errors::InvalidArgument("size (", size_val, ") must be non-negative"); + } + c->set_output(0, c->MakeShape({size_val})); return Status::OK(); }); -- GitLab From e6981fc2225a529427391e98f492eee7bb865988 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 11 Aug 2018 18:39:13 +0000 Subject: [PATCH 0031/1357] Add additional test cases for Bincount Shape function, and fix clang-format issue Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 3 ++- tensorflow/core/ops/math_ops_test.cc | 12 ++++++++++++ .../python/kernel_tests/bincount_op_test.py | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index b57385f63b..0ba4a9a005 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1430,7 +1430,8 @@ REGISTER_OP("Bincount") // Return `[size]` shape if size is known. int32 size_val = size_tensor->scalar()(); if (size_val < 0) { - return errors::InvalidArgument("size (", size_val, ") must be non-negative"); + return errors::InvalidArgument("size (", size_val, + ") must be non-negative"); } c->set_output(0, c->MakeShape({size_val})); return Status::OK(); diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc index 23f1538912..7bf7c476f4 100644 --- a/tensorflow/core/ops/math_ops_test.cc +++ b/tensorflow/core/ops/math_ops_test.cc @@ -558,4 +558,16 @@ TEST(MathOpsTest, QuantizedAdd_ShapeFn) { INFER_ERROR("must be rank 0", op, "?;?;?;?;[3];?"); INFER_ERROR("must be rank 0", op, "?;?;?;?;?;[4]"); } + +TEST(MathOpsTest, Bincount_ShapeFn) { + ShapeInferenceTestOp op("Bincount"); + + // size should be scalar. + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;[1];?"); + + INFER_OK(op, "?;?;?", "[?]"); + INFER_OK(op, "?;[];?", "[?]"); + INFER_OK(op, "[?];[];?", "[?]"); + INFER_OK(op, "[?];[];[?]", "[?]"); +} } // end namespace tensorflow diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 2767df127e..15d9de56db 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -22,6 +22,8 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest @@ -97,6 +99,23 @@ class BincountTest(test_util.TensorFlowTestCase): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() + def test_shape_function(self): + # size must be scalar. + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1 for 'Bincount'"): + gen_math_ops.bincount([1, 2, 3, -1, 6, 8], [1], []) + # size must be positive. + with self.assertRaisesRegexp( + ValueError, "must be non-negative"): + gen_math_ops.bincount([1, 2, 3, -1, 6, 8], -5, []) + # if size is a constant then the shape is known. + v1 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], 5, []) + self.assertAllEqual(v1.get_shape().as_list(), [5]) + # if size is a placeholder then the shape is unknown. + s = array_ops.placeholder(dtype=dtypes.int32) + v2 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], s, []) + self.assertAllEqual(v2.get_shape().as_list(), [None]) + if __name__ == "__main__": googletest.main() -- GitLab From 7ad604778ed69303458145376f2b6ec403fc5345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Aug 2018 15:57:54 +0800 Subject: [PATCH 0032/1357] ENH: mapping idx to feature_name --- .../python/estimator/canned/boosted_trees.py | 38 ++++++-- .../estimator/canned/boosted_trees_test.py | 88 +++++++++++++++++-- 2 files changed, 113 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 060f5cb3fa..ba90b361b3 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -201,6 +201,23 @@ def _calculate_num_features(sorted_feature_columns): return num_features +def _generate_feature_name_for_index(sorted_feature_columns): + names = [] + for column in sorted_feature_columns: + if isinstance(column, feature_column_lib._IndicatorColumn): # pylint:disable=protected-access + categorical_column = column.categorical_column + if isinstance(categorical_column, + feature_column_lib._VocabularyListCategoricalColumn): # pylint:disable=protected-access + for voc in categorical_column.vocabulary_list: + names.append('{}:{}'.format(column.name, voc)) + else: + for num in categorical_column._num_buckets: # pylint:disable=protected-access + names.append('{}:{}'.format(column.name, num)) + else: + names.append(column.name) + return names + + def _cache_transformed_features(features, sorted_feature_columns, batch_size): """Transform features and cache, then returns (cached_features, cache_op).""" num_features = _calculate_num_features(sorted_feature_columns) @@ -943,7 +960,9 @@ def _compute_feature_importance_for_tree(tree, num_features, normalize): return importances -def compute_feature_importances(tree_ensemble, num_features, normalize=True): +def compute_feature_importances(tree_ensemble, + num_features, + normalize=True): tree_importances = [_compute_feature_importance_for_tree(tree, num_features, normalize) @@ -957,8 +976,8 @@ def compute_feature_importances(tree_ensemble, num_features, normalize=True): if normalizer > 0.0: feature_importances /= normalizer - sorted_feature = np.argsort(feature_importances)[::-1] - return sorted_feature, feature_importances[sorted_feature] + sorted_feature_idx = np.argsort(feature_importances)[::-1] + return sorted_feature_idx, feature_importances[sorted_feature_idx] class _BoostedTrees(estimator.Estimator): @@ -967,15 +986,18 @@ class _BoostedTrees(estimator.Estimator): super(_BoostedTrees, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) - sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) - self._num_features = _calculate_num_features(sorted_feature_columns) + self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) def compute_feature_importances(self, normalize=True): tree_ensemble = self._read_tree_ensemble_from_checkpoint() if tree_ensemble: - return compute_feature_importances(tree_ensemble, - self._num_features, - normalize) + num_features = _calculate_num_features(self._sorted_feature_columns) + names_for_idx = np.array( + _generate_feature_name_for_index(self._sorted_feature_columns)) + idx, importances = compute_feature_importances(tree_ensemble, + num_features, + normalize) + return names_for_idx[idx], importances else: return [], [] diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 054d820527..880f0f10ba 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -165,7 +165,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - self.assertEqual(3, est._num_features) # pylint:disable=protected-access sorted_features, importances = est.compute_feature_importances() self.assertAllEqual([], sorted_features) self.assertAllEqual([], importances) @@ -568,7 +567,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): n_trees=1, max_depth=5) - self.assertEqual(3, est._num_features) # pylint:disable=protected-access # It will stop after 5 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. @@ -624,17 +622,97 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): # tree_weights: 1.0 # tree_weights: 1.0 # ...... - sorted_features_expected = [0, 2, 1] + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. [0.0, 0.0, 0.0]] # 2nd tree. sorted_features, importances = est.compute_feature_importances(normalize=False) - self.assertAllEqual(sorted_features_expected, sorted_features) + self.assertAllEqual(feature_names_expected, sorted_features) self.assertAllClose(_compute_feature_importances_np(feature_gains, False), importances) sorted_features1, importances1 = est.compute_feature_importances(normalize=True) - self.assertAllEqual(sorted_features_expected, sorted_features1) + self.assertAllEqual(feature_names_expected, sorted_features1) + self.assertAllClose(_compute_feature_importances_np(feature_gains, True), + importances1) + + def testCalculateFeatureImportancesWithIndicatorColumn(self): + categorical = feature_column.categorical_column_with_vocabulary_list( + key='categorical', vocabulary_list=('bad', 'good', 'ok')) + feature_indicator = feature_column.indicator_column(categorical) + bucketized_col = feature_column.bucketized_column( + feature_column.numeric_column( + 'an_uninformative_feature', dtype=dtypes.float32), + BUCKET_BOUNDARIES) + + labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) + # Our categorical feature defines the labels perfectly + input_fn = numpy_io.numpy_input_fn( + x={ + 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), + 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), + }, + y=labels, + batch_size=5, + shuffle=False) + + # Train depth 1 tree. + est = boosted_trees.BoostedTreesRegressor( + feature_columns=[bucketized_col, feature_indicator], + n_batches_per_layer=1, + n_trees=1, + learning_rate=1.0, + max_depth=1) + + num_steps = 1 + est.train(input_fn, steps=num_steps) + + # TreeEnsemble Proto: + # trees { + # nodes { + # bucketized_split { + # feature_id: 2 + # left_id: 1 + # right_id: 2 + # } + # metadata { + # gain: 15.5952005386 + # } + # } + # nodes { + # leaf { + # } + # } + # nodes { + # leaf { + # scalar: 5.7000002861 + # } + # } + # } + # trees { + # nodes { + # leaf { + # } + # } + # } + # tree_weights: 1.0 + # tree_weights: 1.0 + feature_names_expected = ['categorical_indicator:good', + # Reverse order because feature importances + # are sorted by np.argsort(f)[::-1] + 'categorical_indicator:ok', + 'categorical_indicator:bad', + 'an_uninformative_feature_bucketized'] + feature_gains = [[15.5952005386, 0.0, 0.0, 0.0], # 1st tree. + [0.0, 0.0, 0.0, 0.0]] # 2nd tree. + + sorted_features, importances = est.compute_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, sorted_features) + self.assertAllClose(_compute_feature_importances_np(feature_gains, False), + importances) + + sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, sorted_features1) self.assertAllClose(_compute_feature_importances_np(feature_gains, True), importances1) -- GitLab From aa25cc078c9b55e5ca3e0f59df43e169bfee8f3c Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Thu, 16 Aug 2018 19:04:37 +0800 Subject: [PATCH 0033/1357] Add LeakyRelu C++ Op and its gradient implementation. LeakyRelu, defined as 'y = { x (x>=0) or alpha*x (x<0) }', was computed by combined Ops 'max(x, alpha*x)' in current codes. Hence its gradient calculation for back propagation would contain a serial of element-wise Ops. This looks really unnecessary for such a simple op and it could be done within just one Op with less memory accesses. --- tensorflow/cc/gradients/nn_grad.cc | 13 ++ tensorflow/cc/gradients/nn_grad_test.cc | 13 ++ tensorflow/core/kernels/relu_op.cc | 153 +++++++++++------- tensorflow/core/kernels/relu_op.h | 59 +++++++ tensorflow/core/kernels/relu_op_functor.h | 31 ++++ tensorflow/core/kernels/relu_op_gpu.cu.cc | 18 ++- tensorflow/core/ops/nn_ops.cc | 15 ++ tensorflow/core/ops/ops.pbtxt | 68 ++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 2 + .../python/kernel_tests/relu_op_test.py | 113 +++++++++++++ tensorflow/python/ops/nn_grad.py | 15 ++ tensorflow/python/ops/nn_ops.py | 3 +- 12 files changed, 432 insertions(+), 71 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 588e96cb19..0fc23d0bf7 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -143,6 +143,19 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper); +Status LeakyReluGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + attrs.Alpha(alpha); + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index aa72cf7ba2..5ebece7b6e 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -41,6 +41,7 @@ using ops::MaxPoolV2; using ops::Placeholder; using ops::Relu; using ops::Relu6; +using ops::LeakyRelu; using ops::Selu; using ops::Softmax; using ops::Softplus; @@ -160,6 +161,18 @@ TEST_F(NNGradTest, Relu6Grad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = LeakyRelu(scope_, x); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index d52358737f..c4f2ef5632 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -33,19 +33,25 @@ typedef Eigen::GpuDevice GPUDevice; typedef Eigen::SyclDevice SYCLDevice; #endif // TENSORFLOW_USE_SYCL -#define REGISTER_RELU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_CPU).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_CPU).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint("T"), \ - Relu6GradOp) +#define REGISTER_RELU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_CPU).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_CPU).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint("T"), \ + Relu6GradOp) \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + LeakyReluGradOp); TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); #undef REGISTER_RELU_KERNELS @@ -99,6 +105,19 @@ namespace functor { extern template struct Relu6Grad; \ \ template <> \ + void LeakyRelu::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor features, \ + T alpha, typename TTypes::Tensor activations); \ + extern template struct LeakyRelu; \ + \ + template <> \ + void LeakyReluGrad::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor gradients, \ + typename TTypes::ConstTensor features, \ + T alpha, typename TTypes::Tensor backprops); \ + extern template struct LeakyReluGrad; \ + \ + template <> \ void Elu::operator()(const GPUDevice& d, \ typename TTypes::ConstTensor features, \ typename TTypes::Tensor activations); \ @@ -128,30 +147,36 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); } // namespace functor // Registration of the GPU implementations. -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_GPU).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_GPU).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_GPU).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_GPU).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_GPU).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_GPU).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint("T"), \ + Relu6GradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + LeakyReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Elu").Device(DEVICE_GPU).TypeConstraint("T"), \ + EluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("EluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + EluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Selu").Device(DEVICE_GPU).TypeConstraint("T"), \ + SeluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ SeluGradOp) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); @@ -161,30 +186,36 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #ifdef TENSORFLOW_USE_SYCL // Registration of the GPU implementations. -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ +#define REGISTER_SYCL_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + Relu6GradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + LeakyReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + EluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + EluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + SeluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ SeluGradOp) TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index e712b02bd7..c55190065c 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -131,6 +131,65 @@ void Relu6GradOp::OperateNoTemplate(OpKernelContext* context, output->flat()); } +template +class LeakyReluOp : public UnaryElementWiseOp> { + public: + explicit LeakyReluOp(OpKernelConstruction* context) + : UnaryElementWiseOp>(context) { + float alpha_tmp; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp)); + alpha_ = T(alpha_tmp); + } + + void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { + functor::LeakyRelu functor; + functor(context->eigen_device(), input.flat(), + alpha_, output->flat()); + } + + private: + T alpha_; +}; + +template +class LeakyReluGradOp + : public BinaryElementWiseOp> { + public: + explicit LeakyReluGradOp(OpKernelConstruction* context) + : BinaryElementWiseOp>(context) { + float alpha_tmp; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp)); + alpha_ = T(alpha_tmp); + } + + void OperateNoTemplate(OpKernelContext* context, const Tensor& g, + const Tensor& a, T alpha, Tensor* output); + + // INPUTS: + // g (gradients): backpropagated gradients + // a (inputs): either the inputs that were passed to LeakyReluOp(), or its + // outputs (using either one yields the same result here). + // OUTPUT: + // gradients to backprop + template + void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a, + Tensor* output) { + OperateNoTemplate(context, g, a, alpha_, output); + } + + private: + T alpha_; +}; + +template +void LeakyReluGradOp::OperateNoTemplate(OpKernelContext* context, + const Tensor& g, const Tensor& a, T alpha, Tensor* output) { + if (!ReluHelpers::ValidateSameSize(context, g, a)) return; + functor::LeakyReluGrad functor; + functor(context->eigen_device(), g.flat(), a.flat(), alpha, + output->flat()); +}; + template class EluOp : public UnaryElementWiseOp> { public: diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 3bc5ba8a50..7f0951451d 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -91,6 +91,37 @@ struct Relu6Grad { } }; + +// Functor used by LeakyReluOp to do the computations. +template +struct LeakyRelu { + // Computes LeakyRelu activation. + // + // features: any shape. + // activations: same shape as "features". + void operator()(const Device& d, typename TTypes::ConstTensor features, + T alpha, typename TTypes::Tensor activations) { + activations.device(d) = features.cwiseMax(features * alpha); + } +}; + +// Functor used by LeakyReluGradOp to do the computations. +template +struct LeakyReluGrad { + // Computes LeakyReluGrad backprops. + // + // gradients: gradients backpropagated to the LeakyRelu op. + // features: either the inputs that were passed to the LeakyRelu or, or its + // outputs (using either one yields the same result here). + // backprops: gradients to backpropagate to the LeakyRelu inputs. + void operator()(const Device& d, typename TTypes::ConstTensor gradients, + typename TTypes::ConstTensor features, T alpha, + typename TTypes::Tensor backprops) { + backprops.device(d) = + (features > static_cast(0)).select(gradients, gradients * alpha); + } +}; + // Functor used by EluOp to do the computations. template struct Elu { diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc index 089ca8ed27..4452f4dcc9 100644 --- a/tensorflow/core/kernels/relu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc @@ -114,14 +114,16 @@ struct ReluGrad { } // namespace functor // Definition of the GPU implementations declared in relu_op.cc. -#define DEFINE_GPU_KERNELS(T) \ - template struct functor::Relu; \ - template struct functor::ReluGrad; \ - template struct functor::Relu6; \ - template struct functor::Relu6Grad; \ - template struct functor::Elu; \ - template struct functor::EluGrad; \ - template struct functor::Selu; \ +#define DEFINE_GPU_KERNELS(T) \ + template struct functor::Relu; \ + template struct functor::ReluGrad; \ + template struct functor::Relu6; \ + template struct functor::Relu6Grad; \ + template struct functor::LeakyRelu; \ + template struct functor::LeakyReluGrad; \ + template struct functor::Elu; \ + template struct functor::EluGrad; \ + template struct functor::Selu; \ template struct functor::SeluGrad; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e0f25fb4ef..023f988f80 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -983,6 +983,21 @@ REGISTER_OP("Relu6Grad") .Attr("T: realnumbertype") .SetShapeFn(shape_inference::MergeBothInputsShapeFn); +REGISTER_OP("LeakyRelu") + .Input("features: T") + .Output("activations: T") + .Attr("alpha: float = 0.2") + .Attr("T: {half, float, double} = DT_FLOAT") + .SetShapeFn(shape_inference::UnchangedShape); + +REGISTER_OP("LeakyReluGrad") + .Input("gradients: T") + .Input("features: T") + .Output("backprops: T") + .Attr("alpha: float = 0.2") + .Attr("T: {half, float, double} = DT_FLOAT") + .SetShapeFn(shape_inference::MergeBothInputsShapeFn); + REGISTER_OP("Elu") .Input("features: T") .Output("activations: T") diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index f2595279e0..837e91bc23 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13604,6 +13604,74 @@ op { minimum: 1 } } +op { + name: "LeakyRelu" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "LeakykReluGrad" + input_arg { + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "backprops" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} op { name: "LearnedUnigramCandidateSampler" input_arg { diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 2d54555cd3..9b3b5fd7aa 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { "SoftplusGrad", "Softsign", "ReluGrad", + "LeakyReluGrad", "Conv2D", "DepthwiseConv2dNative", "Dilation2D", @@ -1799,6 +1800,7 @@ bool OpDoesntRequireInput(const string& op_name) { "BiasAdd", "Relu", "Relu6", + "LeakyRelu", "Elu", "Selu", "SparseSoftmaxCrossEntropyWithLogits", diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 25e947f09e..ccb3a231bb 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -252,6 +252,119 @@ class Relu6Test(test.TestCase): self.assertLess(err, 1e-10) +class LeakyReluTest(test.TestCase): + + def _npLeakyRelu(self, np_features, alpha=0.1): + return np.maximum(np_features, alpha * np_features) + + def testNpLeakyRelu(self): + self.assertAllClose( + np.array([[-0.09, 0.7, -0.05, 0.3, -0.01], + [0.1, -0.03, 0.5, -0.07, 0.9]]), + self._npLeakyRelu( + np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7, 0.9] + ]), alpha=0.1)) + + def _testLeakyRelu(self, np_features, alpha, use_gpu=False): + np_leaky_relu = self._npLeakyRelu(np_features, alpha) + with self.test_session(use_gpu=use_gpu): + leaky_relu = nn_ops.leaky_relu(np_features, alpha) + tf_leaky_relu = leaky_relu.eval() + self.assertAllClose(np_leaky_relu, tf_leaky_relu) + self.assertShapeEqual(np_leaky_relu, leaky_relu) + + def testNumbers(self): + for t in [np.int32, np.int64, np.float16, np.float32, np.float64]: + self._testLeakyRelu( + np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), + alpha=0.2, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._testLeakyRelu( + np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), + alpha=0.1, use_gpu=True) + + # The gradient test for ReLU is a bit tricky as the derivative is not well + # defined at around zero and we want to avoid that in terms of input values. + def testGradientFloat32(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], y, [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient err = ", err) + self.assertLess(err, 1e-4) + + def testGradientFloat64(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.2, name="leaky_relu") + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], y, [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient err = ", err) + self.assertLess(err, 1e-10) + + def testGradGradFloat32(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient of gradient err = ", err) + self.assertLess(err, 1e-4) + + def testGradGradFloat64(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient of gradient err = ", err) + self.assertLess(err, 1e-10) + + def testGradientScalar(self): + with self.test_session() as sess: + x = variables.Variable(-100.) + y = nn_ops.leaky_relu(x, 0.05) + loss = y**2 + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.2) + train_op = optimizer.minimize(loss) + sess.run(variables.global_variables_initializer()) + sess.run(train_op) + self.assertAllClose(x.eval(), -99.9) + + class EluTest(test.TestCase): def _npElu(self, np_features): diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index df23ac55ce..c2dd58bdf0 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -390,6 +390,21 @@ def _Relu6GradGrad(op, grad): array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) +@ops.RegisterGradient("LeakyRelu") +def _LeakyReluGrad(op, grad): + x = op.inputs[0] + alpha = op.get_attr("alpha") + return gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha) + + +@ops.RegisterGradient("LeakyReluGrad") +def _LeakyReluGradGrad(op, grad): + x = op.inputs[1] + alpha = op.get_attr("alpha") + return (gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha), + array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) + + @ops.RegisterGradient("Elu") def _EluGrad(op, grad): return gen_nn_ops.elu_grad(grad, op.outputs[0]) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 6fd1273687..31b8f3945d 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,8 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features, name=name) + return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) def _flatten_outer_dims(logits): -- GitLab From 0845a01256fd3797804f247f76a1655a56c119a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 17 Aug 2018 11:24:21 +0800 Subject: [PATCH 0034/1357] CLN: revise code according to comments --- .../python/estimator/canned/boosted_trees.py | 81 +++++++++++++------ .../estimator/canned/boosted_trees_test.py | 10 +-- 2 files changed, 62 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index ba90b361b3..848698311c 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -201,15 +201,23 @@ def _calculate_num_features(sorted_feature_columns): return num_features -def _generate_feature_name_for_index(sorted_feature_columns): +def _generate_feature_name_mapping(sorted_feature_columns): + """Return a list of feature name for feature ids. + + Args: + sorted_feature_columns: a list/set of tf.feature_column sorted by name. + + Returns: + feature_name_mapping: a list of feature name. + """ names = [] for column in sorted_feature_columns: if isinstance(column, feature_column_lib._IndicatorColumn): # pylint:disable=protected-access categorical_column = column.categorical_column if isinstance(categorical_column, feature_column_lib._VocabularyListCategoricalColumn): # pylint:disable=protected-access - for voc in categorical_column.vocabulary_list: - names.append('{}:{}'.format(column.name, voc)) + for value in categorical_column.vocabulary_list: + names.append('{}:{}'.format(column.name, value)) else: for num in categorical_column._num_buckets: # pylint:disable=protected-access names.append('{}:{}'.format(column.name, num)) @@ -938,7 +946,8 @@ def _create_regression_head(label_dimension, weight_column=None): # pylint: enable=protected-access -def _compute_feature_importance_for_tree(tree, num_features, normalize): +def _compute_feature_importances_per_tree(tree, num_features): + """Computes the importance of each feature in the tree.""" importances = np.zeros(num_features) for node in tree.nodes: @@ -951,21 +960,29 @@ def _compute_feature_importance_for_tree(tree, num_features, normalize): else: raise ValueError('Unexpected split type %s', node_type) - if normalize: - normalizer = np.sum(importances) - if normalizer > 0.0: - # Avoid dividing by zero (e.g., when root is pure) - importances /= normalizer - return importances -def compute_feature_importances(tree_ensemble, - num_features, - normalize=True): - tree_importances = [_compute_feature_importance_for_tree(tree, - num_features, - normalize) +def _compute_feature_importances(tree_ensemble, + num_features, + normalize=True): + """Compute the feature importances. + + The higher the value, the more important the feature. + + Args: + tree_ensemble: TreeEnsemble. + num_features: The total number of feature ids. + normalize: If True, normalize the feature importances. + + Returns: + sorted_feature_idx: A list of feature_id which is sorted + by its feature importance. + feature_importances: A list of corresponding feature importance. + """ + tree_importances = [_compute_feature_importances_per_tree(tree, + num_features, + normalize) for tree in tree_ensemble.trees] tree_importances = np.array(tree_importances) tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) @@ -973,8 +990,8 @@ def compute_feature_importances(tree_ensemble, axis=0) / np.sum(tree_weights) if normalize: normalizer = np.sum(feature_importances) - if normalizer > 0.0: - feature_importances /= normalizer + assert normalizer > 0, 'Trees are all empty or root node only.' + feature_importances /= normalizer sorted_feature_idx = np.argsort(feature_importances)[::-1] return sorted_feature_idx, feature_importances[sorted_feature_idx] @@ -988,18 +1005,34 @@ class _BoostedTrees(estimator.Estimator): self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) - def compute_feature_importances(self, normalize=True): + def experimental_feature_importances(self, normalize=True): + """Compute the feature importances. + + The higher the value, the more important the corresponding feature. + + Args: + normalize: If True, normalize the feature importances. + + Returns: + sorted_feature_names: A list of feature name which is sorted + by its feature importance. + feature_importances: A list of corresponding feature importance. + + Raises: + ValueError: Empty ensemble. + """ tree_ensemble = self._read_tree_ensemble_from_checkpoint() if tree_ensemble: num_features = _calculate_num_features(self._sorted_feature_columns) names_for_idx = np.array( - _generate_feature_name_for_index(self._sorted_feature_columns)) - idx, importances = compute_feature_importances(tree_ensemble, - num_features, - normalize) + _generate_feature_name_mapping(self._sorted_feature_columns)) + idx, importances = _compute_feature_importances(tree_ensemble, + num_features, + normalize) return names_for_idx[idx], importances else: - return [], [] + raise ValueError('Found empty serialized string for TreeEnsemble.' + 'You should only call the method after training.') def _read_tree_ensemble_from_checkpoint(self): with context.graph_mode(): diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 880f0f10ba..8625c7d968 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -165,7 +165,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - sorted_features, importances = est.compute_feature_importances() + sorted_features, importances = est.experimental_feature_importances() self.assertAllEqual([], sorted_features) self.assertAllEqual([], importances) @@ -626,12 +626,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. [0.0, 0.0, 0.0]] # 2nd tree. - sorted_features, importances = est.compute_feature_importances(normalize=False) + sorted_features, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, sorted_features) self.assertAllClose(_compute_feature_importances_np(feature_gains, False), importances) - sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, sorted_features1) self.assertAllClose(_compute_feature_importances_np(feature_gains, True), importances1) @@ -706,12 +706,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_gains = [[15.5952005386, 0.0, 0.0, 0.0], # 1st tree. [0.0, 0.0, 0.0, 0.0]] # 2nd tree. - sorted_features, importances = est.compute_feature_importances(normalize=False) + sorted_features, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, sorted_features) self.assertAllClose(_compute_feature_importances_np(feature_gains, False), importances) - sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, sorted_features1) self.assertAllClose(_compute_feature_importances_np(feature_gains, True), importances1) -- GitLab From 196f5478d780b6e069290366fd4b85bb09d8141d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 17 Aug 2018 12:22:13 +0800 Subject: [PATCH 0035/1357] CLN: use CheckpointReader to load TreeEnsemble proto --- .../python/estimator/canned/boosted_trees.py | 60 +++++-------------- 1 file changed, 14 insertions(+), 46 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 848698311c..62757ef588 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -24,8 +24,6 @@ import functools import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 -from tensorflow.python.client import session as tf_session -from tensorflow.python.eager import context from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib @@ -43,9 +41,8 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary -from tensorflow.python.training import checkpoint_management +from tensorflow.python.training import checkpoint_utils from tensorflow.python.training import distribute as distribute_lib -from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import estimator_export @@ -61,8 +58,6 @@ _HOLD_FOR_MULTI_DIM_SUPPORT = object() _DUMMY_NUM_BUCKETS = -1 _DUMMY_NODE_ID = -1 -_BOOSTED_TREES_SERIALIZED_PROTO = '_BOOSTED_TREES_SERIALIZED_PROTO' - def _get_transformed_features(features, sorted_feature_columns): """Gets the transformed features from features/feature_columns pair. @@ -770,8 +765,6 @@ def _bt_model_fn( bucketized_features=input_feature_list, logits_dimension=head.logits_dimension) else: - _, serialized_proto = tree_ensemble.serialize() - ops.add_to_collection(_BOOSTED_TREES_SERIALIZED_PROTO, serialized_proto) if is_single_machine: local_tree_ensemble = tree_ensemble ensemble_reload = control_flow_ops.no_op() @@ -980,9 +973,7 @@ def _compute_feature_importances(tree_ensemble, by its feature importance. feature_importances: A list of corresponding feature importance. """ - tree_importances = [_compute_feature_importances_per_tree(tree, - num_features, - normalize) + tree_importances = [_compute_feature_importances_per_tree(tree, num_features) for tree in tree_ensemble.trees] tree_importances = np.array(tree_importances) tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) @@ -1021,43 +1012,20 @@ class _BoostedTrees(estimator.Estimator): Raises: ValueError: Empty ensemble. """ - tree_ensemble = self._read_tree_ensemble_from_checkpoint() - if tree_ensemble: - num_features = _calculate_num_features(self._sorted_feature_columns) - names_for_idx = np.array( - _generate_feature_name_mapping(self._sorted_feature_columns)) - idx, importances = _compute_feature_importances(tree_ensemble, - num_features, - normalize) - return names_for_idx[idx], importances - else: + reader = checkpoint_utils.load_checkpoint(self._model_dir) + serialized = reader.get_tensor('boosted_trees:0_serialized') + if not serialized: raise ValueError('Found empty serialized string for TreeEnsemble.' 'You should only call the method after training.') - - def _read_tree_ensemble_from_checkpoint(self): - with context.graph_mode(): - checkpoint_path = checkpoint_management.latest_checkpoint( - self._model_dir) - if not checkpoint_path: - raise ValueError("Couldn't find trained model at %s." % self._model_dir) - - with ops.Graph().as_default() as g: - with tf_session.Session(config=self._session_config) as session: - meta_file = checkpoint_path + '.meta' - graph_saver = saver.import_meta_graph(meta_file) - graph_saver.restore(session, checkpoint_path) - - serialized_proto = ops.get_collection(_BOOSTED_TREES_SERIALIZED_PROTO) - assert len(serialized_proto) == 1 - serialized_proto_string = session.run(serialized_proto[0]) - - if serialized_proto_string: - tree_ensemble = boosted_trees_pb2.TreeEnsemble() - tree_ensemble.ParseFromString(serialized_proto_string) - return tree_ensemble - else: - # serialized_proto_string is empty string before training. - return None + ensemble_proto = boosted_trees_pb2.TreeEnsemble() + ensemble_proto.ParseFromString(serialized) + + num_features = _calculate_num_features(self._sorted_feature_columns) + names_for_feature_id = np.array( + _generate_feature_name_mapping(self._sorted_feature_columns)) + sorted_feature_id, importances = _compute_feature_importances( + ensemble_proto, num_features, normalize) + return names_for_feature_id[sorted_feature_id], importances @estimator_export('estimator.BoostedTreesClassifier') -- GitLab From 7ed06809ba3aabf1d93cf726a0b9b6416d80ef85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 17 Aug 2018 14:11:50 +0800 Subject: [PATCH 0036/1357] TST: revise test case --- .../estimator/canned/boosted_trees_test.py | 547 +++++++++++++----- 1 file changed, 410 insertions(+), 137 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 8625c7d968..80d9ac7552 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -17,9 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + +from google.protobuf import text_format import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import model_fn from tensorflow.python.estimator import run_config @@ -31,10 +35,12 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gen_boosted_trees_ops +from tensorflow.python.ops import boosted_trees_ops from tensorflow.python.ops import resources from tensorflow.python.ops import variables from tensorflow.python.platform import googletest from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import session_run_hook NUM_FEATURES = 3 @@ -91,17 +97,6 @@ def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None): return _input_fn -def _compute_feature_importances_np(feature_gains, normalize): - if normalize: - feature_gains /= np.sum(feature_gains, axis=1, keepdims=True) - feature_gains = np.nan_to_num(feature_gains) - feature_importances = np.sum(feature_gains, axis=0) / len(feature_gains) - feature_importances /= np.sum(feature_importances) - return np.nan_to_num(feature_importances) - else: - return np.sum(feature_gains, axis=0) / len(feature_gains) - - class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): def setUp(self): @@ -165,9 +160,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - sorted_features, importances = est.experimental_feature_importances() - self.assertAllEqual([], sorted_features) - self.assertAllEqual([], importances) + + with self.assertRaisesRegexp(ValueError, 'empty'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(ValueError, 'empty'): + est.experimental_feature_importances(normalize=True) def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -558,7 +556,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) - def testCalculateFeatureImportances(self): + def testExperimentalFeatureImportancesWithTraining(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -572,71 +570,358 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - # TreeEnsemble Proto: - # tree_ensemble: trees { - # nodes { - # bucketized_split { - # feature_id: 2 - # threshold: 2 - # left_id: 1 - # right_id: 2 - # } - # metadata { - # gain: 0.426666676998 - # } - # } - # ...... - # nodes { - # bucketized_split { - # threshold: 1 - # left_id: 5 - # right_id: 6 - # } - # metadata { - # gain: 0.133481562138 - # original_leaf { - # scalar: 0.066666662693 - # } - # } - # } - # ...... - # nodes { - # bucketized_split { - # left_id: 11 - # right_id: 12 - # } - # metadata { - # gain: 0.400360047817 - # original_leaf { - # scalar: 0.0599950700998 - # } - # } - # } - # } - # trees { - # nodes { - # leaf { - # } - # } - # } - # tree_weights: 1.0 - # tree_weights: 1.0 - # ...... feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. - [0.0, 0.0, 0.0]] # 2nd tree. - sorted_features, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, sorted_features) - self.assertAllClose(_compute_feature_importances_np(feature_gains, False), - importances) + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.2669208, 0.21333334, 0.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.55579074, 0.44420926, 0.0], importances) + + def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text): + with ops.Graph().as_default(): + with ops.name_scope('boosted_trees') as name: + tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) + tree_ensemble_proto = boosted_trees_pb2.TreeEnsemble() + text_format.Merge(tree_ensemble_text, tree_ensemble_proto) + stamp_token, _ = tree_ensemble.serialize() + restore_op = tree_ensemble.deserialize( + stamp_token, tree_ensemble_proto.SerializeToString()) + + with session.Session() as sess: + resources.initialize_resources(resources.shared_resources()).run() + restore_op.run() + saver = saver_lib.Saver() + save_path = os.path.join(est.model_dir, 'model.ckpt') + saver.save(sess, save_path) + + def testExperimentalCalculateFeatureImportances(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 3 + right_id: 4 + } + metadata { + gain: 3.0 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 5 + right_id: 6 + } + metadata { + gain: 2.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 7 + right_id: 8 + } + metadata { + gain: 1.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 1.0 + } + } + nodes { + bucketized_split { + feature_id: 2 + left_id: 3 + right_id: 4 + } + metadata { + gain: 1.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([2.5, 1.5, 1.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + + def testExperimentalCalculateFeatureImportancesWithTreeWeights(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 12.5 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 3 + right_id: 4 + } + metadata { + gain: 5.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 5.0 + } + } + } + tree_weights: 0.4 + tree_weights: 0.6 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([5.0, 3.0, 2.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + + def testExperimentalCalculateFeatureImportancesWithEmptyTree(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 3.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 3 + right_id: 4 + } + metadata { + gain: 1.0 + } + } + } + trees { + nodes { + leaf { + scalar: 0.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([1.5, 0.5, 0.0], importances) - sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, sorted_features1) - self.assertAllClose(_compute_feature_importances_np(feature_gains, True), - importances1) + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.75, 0.25, 0.0], importances) - def testCalculateFeatureImportancesWithIndicatorColumn(self): + def testExperimentalCalculateFeatureImportancesWithAllEmptyTree(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + leaf { + scalar: 0.0 + } + } + } + trees { + nodes { + leaf { + scalar: 0.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + # Reverse order because feature importances are sorted by np.argsort(f)[::-1] + feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.0, 0.0, 0.0], importances) + + with self.assertRaisesRegexp(AssertionError, 'empty or root node'): + est.experimental_feature_importances(normalize=True) + + def testExperimentalCalculateFeatureImportancesWithMoreTrees(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=5, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 4.0 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 3 + right_id: 4 + } + metadata { + gain: 3.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 1 + left_id: 1 + right_id: 2 + } + metadata { + gain: 1.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 8.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + tree_weights: 1.0 + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([2, 1.2, 0.8], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + + def testExperimentalFeatureImportancesWithIndicatorColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) @@ -645,76 +930,64 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'an_uninformative_feature', dtype=dtypes.float32), BUCKET_BOUNDARIES) - labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) - # Our categorical feature defines the labels perfectly - input_fn = numpy_io.numpy_input_fn( - x={ - 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), - 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), - }, - y=labels, - batch_size=5, - shuffle=False) - - # Train depth 1 tree. est = boosted_trees.BoostedTreesRegressor( feature_columns=[bucketized_col, feature_indicator], n_batches_per_layer=1, - n_trees=1, + n_trees=2, learning_rate=1.0, max_depth=1) - num_steps = 1 - est.train(input_fn, steps=num_steps) + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 5.0 + } + } + nodes { + bucketized_split { + feature_id: 3 + left_id: 3 + right_id: 4 + } + metadata { + gain: 2.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 3.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - # TreeEnsemble Proto: - # trees { - # nodes { - # bucketized_split { - # feature_id: 2 - # left_id: 1 - # right_id: 2 - # } - # metadata { - # gain: 15.5952005386 - # } - # } - # nodes { - # leaf { - # } - # } - # nodes { - # leaf { - # scalar: 5.7000002861 - # } - # } - # } - # trees { - # nodes { - # leaf { - # } - # } - # } - # tree_weights: 1.0 - # tree_weights: 1.0 feature_names_expected = ['categorical_indicator:good', - # Reverse order because feature importances - # are sorted by np.argsort(f)[::-1] + 'an_uninformative_feature_bucketized', 'categorical_indicator:ok', - 'categorical_indicator:bad', - 'an_uninformative_feature_bucketized'] - feature_gains = [[15.5952005386, 0.0, 0.0, 0.0], # 1st tree. - [0.0, 0.0, 0.0, 0.0]] # 2nd tree. - - sorted_features, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, sorted_features) - self.assertAllClose(_compute_feature_importances_np(feature_gains, False), - importances) - - sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, sorted_features1) - self.assertAllClose(_compute_feature_importances_np(feature_gains, True), - importances1) + 'categorical_indicator:bad'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([2.5, 1.5, 1.0, 0.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances) class ModelFnTests(test_util.TensorFlowTestCase): -- GitLab From 52d637e604dacd3bff836a27bd991f95966226e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sun, 19 Aug 2018 17:28:12 +0800 Subject: [PATCH 0037/1357] CLN: normalize is False by default --- tensorflow/python/estimator/canned/boosted_trees.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 62757ef588..c59b59b653 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -956,9 +956,7 @@ def _compute_feature_importances_per_tree(tree, num_features): return importances -def _compute_feature_importances(tree_ensemble, - num_features, - normalize=True): +def _compute_feature_importances(tree_ensemble, num_features, normalize): """Compute the feature importances. The higher the value, the more important the feature. @@ -972,6 +970,9 @@ def _compute_feature_importances(tree_ensemble, sorted_feature_idx: A list of feature_id which is sorted by its feature importance. feature_importances: A list of corresponding feature importance. + + Raises: + AssertionError: Trees are all empty or root node only when normalizing. """ tree_importances = [_compute_feature_importances_per_tree(tree, num_features) for tree in tree_ensemble.trees] @@ -996,7 +997,7 @@ class _BoostedTrees(estimator.Estimator): self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) - def experimental_feature_importances(self, normalize=True): + def experimental_feature_importances(self, normalize=False): """Compute the feature importances. The higher the value, the more important the corresponding feature. @@ -1005,9 +1006,9 @@ class _BoostedTrees(estimator.Estimator): normalize: If True, normalize the feature importances. Returns: - sorted_feature_names: A list of feature name which is sorted + sorted_feature_names: 1-D array of feature name which is sorted by its feature importance. - feature_importances: A list of corresponding feature importance. + feature_importances: 1-D array of the corresponding feature importance. Raises: ValueError: Empty ensemble. -- GitLab From ad18b2dd923329ef598ee12b9bafd7fc63d7013d Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Mon, 20 Aug 2018 00:41:57 +0900 Subject: [PATCH 0038/1357] Implement extract_volume_patches --- .../api_def_ExtractVolumePatches.pbtxt | 49 +++++ tensorflow/core/kernels/BUILD | 14 ++ .../core/kernels/extract_volume_patches_op.cc | 189 ++++++++++++++++++ .../core/kernels/extract_volume_patches_op.h | 58 ++++++ .../extract_volume_patches_op_gpu.cu.cc | 38 ++++ tensorflow/core/ops/array_ops.cc | 103 ++++++++++ tensorflow/python/kernel_tests/BUILD | 12 ++ .../extract_volume_patches_op_test.py | 130 ++++++++++++ 8 files changed, 593 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt create mode 100644 tensorflow/core/kernels/extract_volume_patches_op.cc create mode 100644 tensorflow/core/kernels/extract_volume_patches_op.h create mode 100644 tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc create mode 100644 tensorflow/python/kernel_tests/extract_volume_patches_op_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt new file mode 100644 index 0000000000..3499ade368 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt @@ -0,0 +1,49 @@ +op { + graph_op_name: "ExtractVolumePatches" + in_arg { + name: "images" + description: < +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +static inline void ParseAttributeVec5(OpKernelConstruction* context, + const string& attr_name, + std::vector* attr) { + OP_REQUIRES_OK(context, context->GetAttr(attr_name, attr)); + OP_REQUIRES( + context, (*attr)[0] == 1 && (*attr)[4] == 1, + errors::Unimplemented("Only support ", attr_name, " across space.")); + OP_REQUIRES(context, (*attr)[1] >= 1 && (*attr)[2] >= 1 && (*attr)[3] >= 1, + errors::OutOfRange(attr_name, " is out of range.")); +} + +template +class ExtractVolumePatchesOp : public UnaryOp { + public: + explicit ExtractVolumePatchesOp(OpKernelConstruction* context) + : UnaryOp(context) { + ParseAttributeVec5(context, "ksizes", &ksizes_); + ParseAttributeVec5(context, "strides", &strides_); + //ParseAttributeVec5(context, "rates", &rates_); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + } + + void Compute(OpKernelContext* context) override { + // Input tensor is of the following dimensions: + // [ batch, in_planes, in_rows, in_cols, channels ] + const Tensor& input = context->input(0); + OP_REQUIRES(context, input.dims() == 5, + errors::InvalidArgument("input must be 5-dimensional", + input.shape().DebugString())); + + const int batch = input.dim_size(0); + const int in_planes = input.dim_size(1); + const int in_rows = input.dim_size(2); + const int in_cols = input.dim_size(3); + const int depth = input.dim_size(4); + + const int ksize_planes = ksizes_[1]; + const int ksize_rows = ksizes_[2]; + const int ksize_cols = ksizes_[3]; + + const int stride_planes = strides_[1]; + const int stride_rows = strides_[2]; + const int stride_cols = strides_[3]; + + /* + // In order to enable rates, uncomment the following lines and use + // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize + // calls. + + const int rate_planes = rates_[1]; + const int rate_rows = rates_[2]; + const int rate_cols = rates_[3]; + + const int ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1); + const int ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1); + const int ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1); + */ + + int64 out_planes = 0, out_rows = 0, out_cols = 0; + int64 pad_planes = 0, pad_rows = 0, pad_cols = 0; + OP_REQUIRES_OK(context, + GetWindowedOutputSize(in_planes, ksize_planes, stride_planes, + padding_, &out_planes, &pad_planes)); + OP_REQUIRES_OK(context, + GetWindowedOutputSize(in_rows, ksize_rows, stride_rows, + padding_, &out_rows, &pad_rows)); + OP_REQUIRES_OK(context, + GetWindowedOutputSize(in_cols, ksize_cols, stride_cols, + padding_, &out_cols, &pad_cols)); + + const std::vector out_sizes = {batch, out_planes, out_rows, out_cols, + ksize_planes * ksize_rows * ksize_cols * depth}; + TensorShape out_shape(out_sizes); + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); + + // If there is nothing to compute, return. + if (out_shape.num_elements() == 0) { + return; + } + + functor::ExtractVolumePatchesForward()( + context->eigen_device(), input.tensor(), + ksize_planes, ksize_rows, ksize_cols, + stride_planes, stride_rows, stride_cols, + /* rate_planes, rate_rows, rate_cols, */ + BrainPadding2EigenPadding(padding_), output->tensor()); + } + + private: + std::vector ksizes_; + std::vector strides_; + // std::vector rates_; + + Padding padding_; + + TF_DISALLOW_COPY_AND_ASSIGN(ExtractVolumePatchesOp); +}; + +// Registration of the CPU implementations. +#define REGISTER(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ExtractVolumePatches").Device(DEVICE_CPU).TypeConstraint("T"), \ + ExtractVolumePatchesOp); + +TF_CALL_REAL_NUMBER_TYPES(REGISTER); + +#undef REGISTER + +#if GOOGLE_CUDA + +// Forward declarations of the functor specializations for GPU. +namespace functor { + +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ExtractVolumePatchesForward::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor input, \ + int patch_planes, int patch_rows, int patch_cols, \ + int stride_planes, int stride_rows, int stride_cols, \ + /* int rate_planes, int rate_rows, int rate_cols, */ \ + const Eigen::PaddingType& padding, \ + typename TTypes::Tensor output); \ + extern template struct ExtractVolumePatchesForward; + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); + +#undef DECLARE_GPU_SPEC + +} // namespace functor + +// Registration of the GPU implementations. +#define REGISTER(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ExtractVolumePatches").Device(DEVICE_GPU).TypeConstraint("T"), \ + ExtractVolumePatchesOp); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER); + +#undef REGISTER + +#endif // GOOGLE_CUDA + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/extract_volume_patches_op.h b/tensorflow/core/kernels/extract_volume_patches_op.h new file mode 100644 index 0000000000..e2418334ac --- /dev/null +++ b/tensorflow/core/kernels/extract_volume_patches_op.h @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ +#define TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/eigen_volume_patch.h" + +namespace tensorflow { +namespace functor { + +template +struct ExtractVolumePatchesForward { + void operator()(const Device& d, typename TTypes::ConstTensor input, + int patch_planes, int patch_rows, int patch_cols, + int stride_planes, int stride_rows, int stride_cols, + /* int rate_planes, int rate_rows, int rate_cols, */ + const Eigen::PaddingType& padding, + typename TTypes::Tensor output) { + const int64 N = std::max(input.size(), output.size()); + if (N <= std::numeric_limits::max()) { + auto output_32bit = To32Bit(output); + output_32bit.device(d) = + To32Bit(input) + .extract_volume_patches(patch_cols, patch_rows, patch_planes, + stride_cols, stride_rows, stride_planes, + padding) + .reshape(output_32bit.dimensions()); + } else { + output.device(d) = + input + .extract_volume_patches(patch_cols, patch_rows, patch_planes, + stride_cols, stride_rows, stride_planes, + padding) + .reshape(output.dimensions()); + } + } +}; + +} // namespace functor +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ diff --git a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc new file mode 100644 index 0000000000..08b3386c13 --- /dev/null +++ b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc @@ -0,0 +1,38 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/extract_volume_patches_op.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + +#define REGISTER(T) template struct ExtractVolumePatchesForward; + +TF_CALL_GPU_NUMBER_TYPES(REGISTER); + +#undef REGISTER + +} // end namespace functor +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index ef8ad7972c..48d8327a9e 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2549,6 +2549,109 @@ REGISTER_OP("ExtractImagePatches") // -------------------------------------------------------------------------- +// To enable rates, uncomment all lines commented below and use ksize_*_eff +// as the second parameter of all GetWindowedOutputSizeVerbose calls instead +// of ksize_*. +REGISTER_OP("ExtractVolumePatches") + .Input("images: T") + .Output("patches: T") + .Attr("ksizes: list(int) >= 5") + .Attr("strides: list(int) >= 5") + /* .Attr("rates: list(int) >= 5") */ + .Attr("T: realnumbertype") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + ShapeHandle input_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 5, &input_shape)); + + std::vector ksizes; + TF_RETURN_IF_ERROR(c->GetAttr("ksizes", &ksizes)); + if (ksizes.size() != 5) { + return errors::InvalidArgument( + "ExtractVolumePatches requires the ksizes attribute to contain 5 " + "values, but got: ", + ksizes.size()); + } + + std::vector strides; + TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides)); + if (strides.size() != 5) { + return errors::InvalidArgument( + "ExtractVolumePatches requires the stride attribute to contain 5 " + "values, but got: ", + strides.size()); + } + + /* + std::vector rates; + TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates)); + if (rates.size() != 5) { + return errors::InvalidArgument( + "ExtractVolumePatches requires the rates attribute to contain 5 " + "values, but got: ", + rates.size()); + } + */ + + int32 ksize_planes = ksizes[1]; + int32 ksize_rows = ksizes[2]; + int32 ksize_cols = ksizes[3]; + + int32 stride_planes = strides[1]; + int32 stride_rows = strides[2]; + int32 stride_cols = strides[3]; + + /* + int32 rate_planes = rates[1]; + int32 rate_rows = rates[2]; + int32 rate_cols = rates[3]; + + int32 ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1); + int32 ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1); + int32 ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1); + */ + + DimensionHandle batch_size_dim = c->Dim(input_shape, 0); + DimensionHandle in_planes_dim = c->Dim(input_shape, 1); + DimensionHandle in_rows_dim = c->Dim(input_shape, 2); + DimensionHandle in_cols_dim = c->Dim(input_shape, 3); + DimensionHandle output_depth_dim; + TF_RETURN_IF_ERROR(c->Multiply( + c->Dim(input_shape, 4), ksize_planes * ksize_rows * ksize_cols, &output_depth_dim)); + + if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) || !c->ValueKnown(in_cols_dim)) { + ShapeHandle output_shape = + c->MakeShape({batch_size_dim, InferenceContext::kUnknownDim, + InferenceContext::kUnknownDim, output_depth_dim}); + c->set_output(0, output_shape); + return Status::OK(); + } + auto in_planes = c->Value(in_planes_dim); + auto in_rows = c->Value(in_rows_dim); + auto in_cols = c->Value(in_cols_dim); + + Padding padding; + TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding)); + + int64 output_planes, output_rows, output_cols; + int64 padding_before, padding_after; + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( + in_planes, ksize_planes, stride_planes, padding, &output_planes, + &padding_before, &padding_after)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( + in_rows, ksize_rows, stride_rows, padding, &output_rows, + &padding_before, &padding_after)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( + in_cols, ksize_cols, stride_cols, padding, &output_cols, + &padding_before, &padding_after)); + ShapeHandle output_shape = c->MakeShape( + {batch_size_dim, output_planes, output_rows, output_cols, output_depth_dim}); + c->set_output(0, output_shape); + return Status::OK(); + }); + +// -------------------------------------------------------------------------- + REGISTER_OP("Bitcast") .Input("input: T") .Output("output: type") diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 2451dc7257..bb896085f2 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1582,6 +1582,18 @@ cuda_py_test( ], ) +cuda_py_test( + name = "extract_volume_patches_op_test", + size = "small", + srcs = ["extract_volume_patches_op_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + ], +) + cuda_py_test( name = "functional_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py new file mode 100644 index 0000000000..215474f6db --- /dev/null +++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py @@ -0,0 +1,130 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for ExtractVolumePatches op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + +class ExtractVolumePatches(test.TestCase): + """Functional tests for ExtractVolumePatches op.""" + + def _VerifyValues(self, image, ksizes, strides, padding, patches): + """Tests input-output pairs for the ExtractVolumePatches op. + + Args: + image: Input tensor with shape: + [batch, in_planes, in_rows, in_cols, depth]. + ksizes: Patch size specified as: [ksize_planes, ksize_rows, ksize_cols]. + strides: Output strides, specified as: + [stride_planes, stride_rows, stride_cols]. + padding: Padding type. + patches: Expected output. + + Note: + rates are not supported as of now. + """ + ksizes = [1] + ksizes + [1] + strides = [1] + strides + [1] + + with self.test_session(use_gpu=True): + out_tensor = array_ops.extract_volume_patches( + constant_op.constant(image), + ksizes=ksizes, + strides=strides, + padding=padding, + name="im2col_3d") + self.assertAllClose(patches, out_tensor.eval()) + + def testKsize1x1x1Stride1x1x1(self): + """Verifies that for 1x1x1 kernel the output equals the input.""" + image = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6]) + 1 + patches = image + for padding in ["VALID", "SAME"]: + self._VerifyValues( + image, + ksizes=[1, 1, 1], + strides=[1, 1, 1], + padding=padding, + patches=patches) + + def testKsize1x1x1Stride2x3x4(self): + """Test for 1x1x1 kernel and strides.""" + image = np.arange(6 * 2 * 4 * 5 * 3).reshape([6, 2, 4, 5, 3]) + 1 + patches = image[:, ::2, ::3, ::4, :] + for padding in ["VALID", "SAME"]: + self._VerifyValues( + image, + ksizes=[1, 1, 1], + strides=[2, 3, 4], + padding=padding, + patches=patches) + + def testKsize1x1x2Stride2x2x3(self): + """Test for 1x1x2 kernel and strides.""" + image = np.arange(45).reshape([1, 3, 3, 5, 1]) + 1 + patches = np.array([[[[[ 1, 2], + [ 4, 5]], + [[11, 12], + [14, 15]]], + [[[31, 32], + [34, 35]], + [[41, 42], + [44, 45]]]]]) + for padding in ["VALID", "SAME"]: + self._VerifyValues( + image, + ksizes=[1, 1, 2], + strides=[2, 2, 3], + padding=padding, + patches=patches) + + def testKsize2x2x2Stride1x1x1Valid(self): + """Test for 2x2x2 kernel with VALID padding.""" + image = np.arange(8).reshape([1, 2, 2, 2, 1]) + 1 + patches = np.array([[[[[1, 2, 3, 4, 5, 6, 7, 8]]]]]) + self._VerifyValues( + image, + ksizes=[2, 2, 2], + strides=[1, 1, 1], + padding="VALID", + patches=patches) + + def testKsize2x2x2Stride1x1x1Same(self): + """Test for 2x2x2 kernel with SAME padding.""" + image = np.arange(8).reshape([1, 2, 2, 2, 1]) + 1 + patches = np.array([[[[[1, 2, 3, 4, 5, 6, 7, 8], + [2, 0, 4, 0, 6, 0, 8, 0]], + [[3, 4, 0, 0, 7, 8, 0, 0], + [4, 0, 0, 0, 8, 0, 0, 0]]], + [[[5, 6, 7, 8, 0, 0, 0, 0], + [6, 0, 8, 0, 0, 0, 0, 0]], + [[7, 8, 0, 0, 0, 0, 0, 0], + [8, 0, 0, 0, 0, 0, 0, 0]]]]]) + self._VerifyValues( + image, + ksizes=[2, 2, 2], + strides=[1, 1, 1], + padding="SAME", + patches=patches) + +if __name__ == "__main__": + test.main() -- GitLab From 5630efcca924563b549a788b4b5ec93fea91e559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 21 Aug 2018 13:06:02 +0800 Subject: [PATCH 0039/1357] CLN: revise according to comments --- .../python/estimator/boosted_trees.py | 5 ++- .../python/estimator/canned/boosted_trees.py | 19 ++++++---- .../estimator/canned/boosted_trees_test.py | 37 ++++++++++++++----- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index 7ed77bcce6..e6bdc97fe5 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -33,7 +33,7 @@ def _validate_input_fn_and_repeat_dataset(train_input_fn): return _input_fn -class _BoostedTreesEstimator(estimator.Estimator): +class _BoostedTreesEstimator(canned_boosted_trees._BoostedTrees): # pylint: disable=protected-access """An Estimator for Tensorflow Boosted Trees models.""" def __init__(self, @@ -115,7 +115,8 @@ class _BoostedTreesEstimator(estimator.Estimator): config=config) super(_BoostedTreesEstimator, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) + model_fn=_model_fn, model_dir=model_dir, config=config, + feature_columns=feature_columns) # pylint:enable=protected-access diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index c59b59b653..d051399b52 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -203,7 +203,7 @@ def _generate_feature_name_mapping(sorted_feature_columns): sorted_feature_columns: a list/set of tf.feature_column sorted by name. Returns: - feature_name_mapping: a list of feature name. + feature_name_mapping: a list of feature names indexed by the feature ids. """ names = [] for column in sorted_feature_columns: @@ -962,17 +962,19 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): The higher the value, the more important the feature. Args: - tree_ensemble: TreeEnsemble. + tree_ensemble: a trained tree ensemble, instance of proto + boosted_trees.TreeEnsemble. num_features: The total number of feature ids. normalize: If True, normalize the feature importances. Returns: sorted_feature_idx: A list of feature_id which is sorted by its feature importance. - feature_importances: A list of corresponding feature importance. + feature_importances: A list of corresponding feature importances. Raises: - AssertionError: Trees are all empty or root node only when normalizing. + AssertionError: If normalize = True and normalization is not possible + (e.g. ensemble is empty or trees contain only a root node). """ tree_importances = [_compute_feature_importances_per_tree(tree, num_features) for tree in tree_ensemble.trees] @@ -982,7 +984,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): axis=0) / np.sum(tree_weights) if normalize: normalizer = np.sum(feature_importances) - assert normalizer > 0, 'Trees are all empty or root node only.' + assert normalizer > 0, 'Trees are all empty or contains only a root node.' feature_importances /= normalizer sorted_feature_idx = np.argsort(feature_importances)[::-1] @@ -990,15 +992,17 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): class _BoostedTrees(estimator.Estimator): + """Base class for boosted trees estimators.""" def __init__(self, model_fn, model_dir, config, feature_columns): super(_BoostedTrees, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) + self._num_features = _calculate_num_features(self._sorted_feature_columns) def experimental_feature_importances(self, normalize=False): - """Compute the feature importances. + """Computes gain-based feature importances. The higher the value, the more important the corresponding feature. @@ -1021,11 +1025,10 @@ class _BoostedTrees(estimator.Estimator): ensemble_proto = boosted_trees_pb2.TreeEnsemble() ensemble_proto.ParseFromString(serialized) - num_features = _calculate_num_features(self._sorted_feature_columns) names_for_feature_id = np.array( _generate_feature_name_mapping(self._sorted_feature_columns)) sorted_feature_id, importances = _compute_feature_importances( - ensemble_proto, num_features, normalize) + ensemble_proto, self._num_features, normalize) return names_for_feature_id[sorted_feature_id], importances diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 80d9ac7552..c764831279 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -161,12 +161,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - with self.assertRaisesRegexp(ValueError, 'empty'): - est.experimental_feature_importances(normalize=False) - - with self.assertRaisesRegexp(ValueError, 'empty'): - est.experimental_feature_importances(normalize=True) - def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -556,7 +550,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) - def testExperimentalFeatureImportancesWithTraining(self): + def testExperimentalFeatureImportancesWithTrainedEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -580,6 +574,31 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.55579074, 0.44420926, 0.0], importances) + def testFeatureImportancesOnEmtpyEnsemble(self): + input_fn = _make_train_input_fn(is_classification=True) + + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + class BailOutWithoutTraining(session_run_hook.SessionRunHook): + + def before_run(self, run_context): + raise StopIteration('to bail out.') + + # The step-0 checkpoint will have only an empty ensemble. + est.train(input_fn, + steps=100, # must stop at 0 anyway. + hooks=[BailOutWithoutTraining()]) + + with self.assertRaisesRegexp(ValueError, 'empty serialized string'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(ValueError, 'empty serialized string'): + est.experimental_feature_importances(normalize=True) + def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text): with ops.Graph().as_default(): with ops.name_scope('boosted_trees') as name: @@ -823,7 +842,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.0, 0.0, 0.0], importances) - with self.assertRaisesRegexp(AssertionError, 'empty or root node'): + with self.assertRaisesRegexp(AssertionError, 'empty or contains'): est.experimental_feature_importances(normalize=True) def testExperimentalCalculateFeatureImportancesWithMoreTrees(self): @@ -921,7 +940,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testExperimentalFeatureImportancesWithIndicatorColumn(self): + def TestFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) -- GitLab From e39bbe4947801c10c41e96fe4cbbb77817136e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 21 Aug 2018 13:52:38 +0800 Subject: [PATCH 0040/1357] TST: add test case for negative feature importances --- .../python/estimator/canned/boosted_trees.py | 5 +- .../estimator/canned/boosted_trees_test.py | 52 ++++++++++++++++--- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index d051399b52..85bc934a0e 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -973,7 +973,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): feature_importances: A list of corresponding feature importances. Raises: - AssertionError: If normalize = True and normalization is not possible + AssertionError: If feature importances contain negative value. + Or if normalize = True and normalization is not possible (e.g. ensemble is empty or trees contain only a root node). """ tree_importances = [_compute_feature_importances_per_tree(tree, num_features) @@ -982,6 +983,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) feature_importances = np.sum(tree_importances * tree_weights, axis=0) / np.sum(tree_weights) + assert np.all(feature_importances >= 0), ('feature_importances ' + 'must be non-negative.') if normalize: normalizer = np.sum(feature_importances) assert normalizer > 0, 'Trees are all empty or contains only a root node.' diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index c764831279..9362b927e2 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -550,7 +550,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) - def testExperimentalFeatureImportancesWithTrainedEnsemble(self): + def testFeatureImportancesWithTrainedEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -616,7 +616,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): save_path = os.path.join(est.model_dir, 'model.ckpt') saver.save(sess, save_path) - def testExperimentalCalculateFeatureImportances(self): + def testFeatureImportances(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -702,7 +702,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testExperimentalCalculateFeatureImportancesWithTreeWeights(self): + def testFeatureImportancesWithTreeWeights(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -758,7 +758,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testExperimentalCalculateFeatureImportancesWithEmptyTree(self): + def testFeatureImportancesWithEmptyTree(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -809,7 +809,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.75, 0.25, 0.0], importances) - def testExperimentalCalculateFeatureImportancesWithAllEmptyTree(self): + def testFeatureImportancesWithAllEmptyTree(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -845,7 +845,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(AssertionError, 'empty or contains'): est.experimental_feature_importances(normalize=True) - def testExperimentalCalculateFeatureImportancesWithMoreTrees(self): + def testFeatureImportancesWithMoreTrees(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -1008,6 +1008,46 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances) + def testNegativeFeatureImportances(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 1 + left_id: 1 + right_id: 2 + } + metadata { + gain: -5.0 + } + } + nodes { + bucketized_split { + feature_id: 2 + left_id: 3 + right_id: 4 + } + metadata { + gain: 2.0 + } + } + } + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=True) + class ModelFnTests(test_util.TensorFlowTestCase): """Tests bt_model_fn including unexposed internal functionalities.""" -- GitLab From 88d722c13418fd177c3e03e954307fdfa86a474b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 21 Aug 2018 14:07:55 +0800 Subject: [PATCH 0041/1357] ENH: don't divide by the sum of tree weights --- .../python/estimator/canned/boosted_trees.py | 3 +-- .../python/estimator/canned/boosted_trees_test.py | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 85bc934a0e..2f5e46b559 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -981,8 +981,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): for tree in tree_ensemble.trees] tree_importances = np.array(tree_importances) tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) - feature_importances = np.sum(tree_importances * tree_weights, - axis=0) / np.sum(tree_weights) + feature_importances = np.sum(tree_importances * tree_weights, axis=0) assert np.all(feature_importances >= 0), ('feature_importances ' 'must be non-negative.') if normalize: diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 9362b927e2..54ad052915 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -556,7 +556,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, - n_trees=1, + n_trees=2, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. @@ -568,11 +568,11 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.2669208, 0.21333334, 0.0], importances) + self.assertAllClose([0.833933, 0.606342, 0.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.55579074, 0.44420926, 0.0], importances) + self.assertAllClose([0.579010, 0.420990, 0.0], importances) def testFeatureImportancesOnEmtpyEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) @@ -696,7 +696,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([2.5, 1.5, 1.0], importances) + self.assertAllClose([5.0, 3.0, 2.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) @@ -803,7 +803,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([1.5, 0.5, 0.0], importances) + self.assertAllClose([3.0, 1.0, 0.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) @@ -934,7 +934,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([2, 1.2, 0.8], importances) + self.assertAllClose([10, 6.0, 4.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) @@ -1002,7 +1002,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'categorical_indicator:bad'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([2.5, 1.5, 1.0, 0.0], importances) + self.assertAllClose([5.0, 3.0, 2.0, 0.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) -- GitLab From 73c8cbb413029cf3e540e99b883ae89f4b08fc11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 21 Aug 2018 14:18:27 +0800 Subject: [PATCH 0042/1357] TST: add test case for full tree with leaves --- .../estimator/canned/boosted_trees_test.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 54ad052915..13e1d224bc 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -845,6 +845,117 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(AssertionError, 'empty or contains'): est.experimental_feature_importances(normalize=True) + def testFeatureImportancesWithFullTrees(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 3 + right_id: 4 + } + metadata { + gain: 3.0 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 5 + right_id: 6 + } + metadata { + gain: 2.0 + } + } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } + nodes { + leaf { + scalar: 0.0 + } + } + nodes { + leaf { + scalar: 3.34 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + nodes { + leaf { + scalar: -0.88 + } + } + nodes { + bucketized_split { + feature_id: 2 + left_id: 3 + right_id: 4 + } + metadata { + gain: 1.0 + } + } + nodes { + leaf { + scalar: 1.88 + } + } + nodes { + leaf { + scalar: -2.88 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([5.0, 3.0, 2.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + def testFeatureImportancesWithMoreTrees(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, -- GitLab From 4979d7314dd1f1788751781b2dfbfb9e47c8e20e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 22 Aug 2018 11:34:50 +0800 Subject: [PATCH 0043/1357] CLN: revise codes --- .../python/estimator/canned/boosted_trees.py | 18 +- .../estimator/canned/boosted_trees_test.py | 338 +++++------------- 2 files changed, 101 insertions(+), 255 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 2f5e46b559..b1d5d60fb0 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -957,7 +957,7 @@ def _compute_feature_importances_per_tree(tree, num_features): def _compute_feature_importances(tree_ensemble, num_features, normalize): - """Compute the feature importances. + """Computes gain-based feature importances. The higher the value, the more important the feature. @@ -986,7 +986,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): 'must be non-negative.') if normalize: normalizer = np.sum(feature_importances) - assert normalizer > 0, 'Trees are all empty or contains only a root node.' + assert normalizer > 0, 'Trees are all empty or contain only a root node.' feature_importances /= normalizer sorted_feature_idx = np.argsort(feature_importances)[::-1] @@ -1000,8 +1000,11 @@ class _BoostedTrees(estimator.Estimator): super(_BoostedTrees, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) - self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) + self._sorted_feature_columns = sorted(feature_columns, + key=lambda tc: tc.name) self._num_features = _calculate_num_features(self._sorted_feature_columns) + self._names_for_feature_id = np.array( + _generate_feature_name_mapping(self._sorted_feature_columns)) def experimental_feature_importances(self, normalize=False): """Computes gain-based feature importances. @@ -1017,21 +1020,20 @@ class _BoostedTrees(estimator.Estimator): feature_importances: 1-D array of the corresponding feature importance. Raises: - ValueError: Empty ensemble. + ValueError: When attempting to normalize on an empty ensemble + or an ensemble of trees which have no splits. """ reader = checkpoint_utils.load_checkpoint(self._model_dir) serialized = reader.get_tensor('boosted_trees:0_serialized') if not serialized: raise ValueError('Found empty serialized string for TreeEnsemble.' - 'You should only call the method after training.') + 'You should only call this method after training.') ensemble_proto = boosted_trees_pb2.TreeEnsemble() ensemble_proto.ParseFromString(serialized) - names_for_feature_id = np.array( - _generate_feature_name_mapping(self._sorted_feature_columns)) sorted_feature_id, importances = _compute_feature_importances( ensemble_proto, self._num_features, normalize) - return names_for_feature_id[sorted_feature_id], importances + return self._names_for_feature_id[sorted_feature_id], importances @estimator_export('estimator.BoostedTreesClassifier') diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 13e1d224bc..24d3a3501e 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -574,7 +574,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.579010, 0.420990, 0.0], importances) - def testFeatureImportancesOnEmtpyEnsemble(self): + def testFeatureImportancesOnEmptyEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -616,7 +616,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): save_path = os.path.join(est.model_dir, 'model.ckpt') saver.save(sess, save_path) - def testFeatureImportances(self): + def testFeatureImportancesOnNonEmptyEnsemble(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -656,130 +656,60 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): } } nodes { - bucketized_split { - feature_id: 0 - left_id: 7 - right_id: 8 - } - metadata { - gain: 1.0 + leaf { + scalar: -0.34 } } - } - trees { nodes { - bucketized_split { - feature_id: 0 - left_id: 1 - right_id: 2 - } - metadata { - gain: 1.0 + leaf { + scalar: 1.34 } } nodes { - bucketized_split { - feature_id: 2 - left_id: 3 - right_id: 4 - } - metadata { - gain: 1.0 + leaf { + scalar: 0.0 } } - } - tree_weights: 1.0 - tree_weights: 1.0 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([5.0, 3.0, 2.0], importances) - - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2], importances) - - def testFeatureImportancesWithTreeWeights(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - max_depth=5) - - tree_ensemble_text = """ - trees { nodes { bucketized_split { feature_id: 0 - left_id: 1 - right_id: 2 + left_id: 7 + right_id: 8 } metadata { - gain: 12.5 + gain: 1.0 } } nodes { - bucketized_split { - feature_id: 1 - left_id: 3 - right_id: 4 + leaf { + scalar: 3.34 } - metadata { - gain: 5.0 + } + nodes { + leaf { + scalar: 1.34 } } } trees { nodes { bucketized_split { - feature_id: 2 + feature_id: 0 left_id: 1 right_id: 2 } metadata { - gain: 5.0 + gain: 1.0 } } - } - tree_weights: 0.4 - tree_weights: 0.6 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([5.0, 3.0, 2.0], importances) - - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2], importances) - - def testFeatureImportancesWithEmptyTree(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - max_depth=5) - - tree_ensemble_text = """ - trees { nodes { - bucketized_split { - feature_id: 2 - left_id: 1 - right_id: 2 - } - metadata { - gain: 3.0 + leaf { + scalar: 3.34 } } nodes { bucketized_split { - feature_id: 0 + feature_id: 2 left_id: 3 right_id: 4 } @@ -787,47 +717,14 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): gain: 1.0 } } - } - trees { - nodes { - leaf { - scalar: 0.0 - } - } - } - tree_weights: 1.0 - tree_weights: 1.0 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([3.0, 1.0, 0.0], importances) - - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.75, 0.25, 0.0], importances) - - def testFeatureImportancesWithAllEmptyTree(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - max_depth=5) - - tree_ensemble_text = """ - trees { nodes { leaf { - scalar: 0.0 + scalar: 3.34 } } - } - trees { nodes { leaf { - scalar: 0.0 + scalar: 1.34 } } } @@ -836,52 +733,42 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - # Reverse order because feature importances are sorted by np.argsort(f)[::-1] - feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.0, 0.0, 0.0], importances) + self.assertAllClose([5.0, 3.0, 2.0], importances) - with self.assertRaisesRegexp(AssertionError, 'empty or contains'): - est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) - def testFeatureImportancesWithFullTrees(self): + def testFeatureImportancesWithTreeWeights(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, - n_trees=2, + n_trees=3, max_depth=5) tree_ensemble_text = """ trees { nodes { bucketized_split { - feature_id: 2 + feature_id: 0 left_id: 1 right_id: 2 } metadata { - gain: 2.0 + gain: 12.5 } } nodes { bucketized_split { - feature_id: 0 + feature_id: 1 left_id: 3 right_id: 4 } metadata { - gain: 3.0 - } - } - nodes { - bucketized_split { - feature_id: 1 - left_id: 5 - right_id: 6 - } - metadata { - gain: 2.0 + gain: 5.0 } } nodes { @@ -899,50 +786,38 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): scalar: 0.0 } } - nodes { - leaf { - scalar: 3.34 - } - } } trees { nodes { bucketized_split { - feature_id: 0 + feature_id: 2 left_id: 1 right_id: 2 } metadata { - gain: 2.0 + gain: 5.0 } } nodes { leaf { - scalar: -0.88 - } - } - nodes { - bucketized_split { - feature_id: 2 - left_id: 3 - right_id: 4 - } - metadata { - gain: 1.0 + scalar: -0.34 } } nodes { leaf { - scalar: 1.88 + scalar: 1.34 } } + } + trees { nodes { leaf { - scalar: -2.88 + scalar: 0.0 } } } - tree_weights: 1.0 + tree_weights: 0.4 + tree_weights: 0.6 tree_weights: 1.0 """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) @@ -956,100 +831,42 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testFeatureImportancesWithMoreTrees(self): + def testFeatureImportancesWithAllEmptyTree(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, - n_trees=5, + n_trees=2, max_depth=5) tree_ensemble_text = """ trees { nodes { - bucketized_split { - feature_id: 2 - left_id: 1 - right_id: 2 - } - metadata { - gain: 4.0 - } - } - nodes { - bucketized_split { - feature_id: 1 - left_id: 3 - right_id: 4 - } - metadata { - gain: 3.0 - } - } - } - trees { - nodes { - bucketized_split { - feature_id: 2 - left_id: 1 - right_id: 2 - } - metadata { - gain: 2.0 - } - } - } - trees { - nodes { - bucketized_split { - feature_id: 1 - left_id: 1 - right_id: 2 - } - metadata { - gain: 1.0 - } - } - } - trees { - nodes { - bucketized_split { - feature_id: 0 - left_id: 1 - right_id: 2 - } - metadata { - gain: 8.0 + leaf { + scalar: 0.0 } } } trees { nodes { - bucketized_split { - feature_id: 0 - left_id: 1 - right_id: 2 - } - metadata { - gain: 2.0 + leaf { + scalar: 0.0 } } } tree_weights: 1.0 tree_weights: 1.0 - tree_weights: 1.0 - tree_weights: 1.0 - tree_weights: 1.0 """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + # Reverse order because feature importances are sorted by np.argsort(f)[::-1] + feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([10, 6.0, 4.0], importances) + self.assertAllClose([0.0, 0.0, 0.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2], importances) + with self.assertRaisesRegexp(AssertionError, + 'all empty or contain only a root node'): + est.experimental_feature_importances(normalize=True) def TestFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( @@ -1089,6 +906,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): gain: 2.0 } } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } + nodes { + leaf { + scalar: 0.0 + } + } } trees { nodes { @@ -1101,6 +933,16 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): gain: 3.0 } } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } } tree_weights: 1.0 tree_weights: 1.0 @@ -1126,6 +968,8 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): n_trees=1, max_depth=5) + # In order to generate a negative feature importances, + # We assign an invalid value -1 to tree_weights here. tree_ensemble_text = """ trees { nodes { @@ -1135,21 +979,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): right_id: 2 } metadata { - gain: -5.0 + gain: 5.0 } } nodes { - bucketized_split { - feature_id: 2 - left_id: 3 - right_id: 4 + leaf { + scalar: -0.34 } - metadata { - gain: 2.0 + } + nodes { + leaf { + scalar: 1.34 } } } - tree_weights: 1.0 + tree_weights: -1.0 """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) -- GitLab From 56ea7fc45559f372315b2aedd0a2df15113f5f93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 22 Aug 2018 17:51:17 +0800 Subject: [PATCH 0044/1357] ENH: div_no_nan supports to treate negative as zero --- tensorflow/python/ops/math_ops.py | 5 ++++- tensorflow/python/ops/math_ops_test.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 67ea534639..a693b1ebac 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1039,13 +1039,14 @@ def div(x, y, name=None): @tf_export("div_no_nan") -def div_no_nan(x, y, name=None): +def div_no_nan(x, y, name=None, negative_to_zero=False): """Computes an unsafe divide which returns 0 if the y is zero. Args: x: A `Tensor`. Must be one of the following types: `float32`, `float64`. y: A `Tensor` whose dtype is compatible with `x`. name: A name for the operation (optional). + negative_to_zero: If `True`, negative is treated as zero in denominator. Returns: The element-wise value of the x divided by y. """ @@ -1058,6 +1059,8 @@ def div_no_nan(x, y, name=None): if x_dtype != y_dtype: raise TypeError("x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype)) + if negative_to_zero: + y = gen_math_ops.maximum(y, 0, name='negative_to_zero') return gen_math_ops.div_no_nan(x, y, name=name) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 6bd41020c5..6e1e5f37c8 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -487,6 +487,19 @@ class DivNoNanTest(test_util.TensorFlowTestCase): tf_result = math_ops.div_no_nan(nums, divs).eval() self.assertAllEqual(tf_result, np_result) + def testNegativeToZero(self): + for dtype in [np.float32, np.float64]: + nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1) + divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24) + + np_result = np.true_divide(nums, divs) + np_result[:, divs[0] <= 0] = 0 + + with self.cached_session(): + tf_result = math_ops.div_no_nan(nums, divs, + negative_to_zero=True).eval() + self.assertAllEqual(tf_result, np_result) + if __name__ == "__main__": googletest.main() -- GitLab From c05bb4efcaf53d4cbc315ef6d12de822f2557a13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 22 Aug 2018 18:13:37 +0800 Subject: [PATCH 0045/1357] CLN: replace safe_div method by div_no_nan --- .../contrib/losses/python/losses/loss_ops.py | 40 ++++--------- .../contrib/metrics/python/ops/metric_ops.py | 46 ++++++--------- tensorflow/contrib/rate/rate.py | 11 +--- .../python/keras/engine/training_utils.py | 3 +- tensorflow/python/keras/metrics.py | 19 +------ tensorflow/python/kernel_tests/losses_test.py | 14 ----- tensorflow/python/ops/losses/losses_impl.py | 40 ++++--------- tensorflow/python/ops/metrics_impl.py | 57 ++++++++----------- 8 files changed, 67 insertions(+), 163 deletions(-) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 651de4e2f4..29f7953c3b 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -66,32 +66,6 @@ def _scale_losses(losses, weights): return math_ops.reduce_sum(reduced_losses) -def _safe_div(numerator, denominator, name="value"): - """Computes a safe divide which returns 0 if the denominator is zero. - - Note that the function contains an additional conditional check that is - necessary for avoiding situations where the loss is zero causing NaNs to - creep into the gradient computation. - - Args: - numerator: An arbitrary `Tensor`. - denominator: A `Tensor` whose shape matches `numerator` and whose values are - assumed to be non-negative. - name: An optional name for the returned op. - - Returns: - The element-wise value of the numerator divided by the denominator. - """ - return array_ops.where( - math_ops.greater(denominator, 0), - math_ops.div(numerator, - array_ops.where( - math_ops.equal(denominator, 0), - array_ops.ones_like(denominator), denominator)), - array_ops.zeros_like(numerator), - name=name) - - def _safe_mean(losses, num_present): """Computes a safe mean of the losses. @@ -104,7 +78,8 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return _safe_div(total_loss, num_present) + return math_ops.div_no_nan(total_loss, num_present, + negative_to_zero=True, name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -609,11 +584,16 @@ def mean_pairwise_squared_error(predictions, math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch) + term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, + num_present_per_batch, + negative_to_zero=True, + name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) - term2 = 2.0 * _safe_div( - math_ops.square(sum_diff), math_ops.square(num_present_per_batch)) + term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff), + math_ops.square(num_present_per_batch), + negative_to_zero=True, + name="value") loss = _scale_losses(term1 - term2, weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index a328670526..d972e7da53 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -45,24 +45,6 @@ from tensorflow.python.util.deprecation import deprecated _EPSILON = 1e-7 -def _safe_div(numerator, denominator, name): - """Divides two values, returning 0 if the denominator is <= 0. - - Args: - numerator: A real `Tensor`. - denominator: A real `Tensor`, with dtype matching `numerator`. - name: Name for the returned op. - - Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` - """ - return array_ops.where( - math_ops.greater(denominator, 0), - math_ops.truediv(numerator, denominator), - 0, - name=name) - - @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the ' 'order of the labels and predictions arguments has been switched.') def streaming_true_positives(predictions, @@ -3205,22 +3187,28 @@ def streaming_covariance(predictions, # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation - batch_mean_prediction = _safe_div( + batch_mean_prediction = math_ops.div_no_nan( math_ops.reduce_sum(weighted_predictions), batch_count, - 'batch_mean_prediction') - delta_mean_prediction = _safe_div( + negative_to_zero=True, + name='batch_mean_prediction') + delta_mean_prediction = math_ops.div_no_nan( (batch_mean_prediction - mean_prediction) * batch_count, update_count, - 'delta_mean_prediction') + negative_to_zero=True, + name='delta_mean_prediction') update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) # prev_mean_prediction is E[x_A] in the update equation prev_mean_prediction = update_mean_prediction - delta_mean_prediction # batch_mean_label is E[y_B] in the update equation - batch_mean_label = _safe_div( - math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label') - delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count, - update_count, 'delta_mean_label') + batch_mean_label = math_ops.div_no_nan( + math_ops.reduce_sum(weighted_labels), batch_count, + negative_to_zero=True, + name='batch_mean_label') + delta_mean_label = math_ops.div_no_nan( + (batch_mean_label - mean_label) * batch_count, update_count, + negative_to_zero=True, + name='delta_mean_label') update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation prev_mean_label = update_mean_label - delta_mean_label @@ -3882,8 +3870,10 @@ def cohen_kappa(labels, po_sum = math_ops.reduce_sum(po) total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( - metrics_impl._safe_div( # pylint: disable=protected-access - pe_row * pe_col, total, None)) + math_ops.div_no_nan( + pe_row * pe_col, total, + negative_to_zero=True, + name=None)) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), math_ops.to_double(total)) diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 24d586479a..68f5a6e58a 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -108,13 +108,6 @@ class Rate(object): def variables(self): return self._vars - def _safe_div(self, numerator, denominator, name): - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero, name=name) - def _add_variable(self, name, shape=None, dtype=None): """Private method for adding variables to the graph.""" if self._built: @@ -148,4 +141,6 @@ class Rate(object): state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) - return self._safe_div(self.numer, self.denom, name="safe_rate") + return math_ops.div_no_nan(self.numer, self.denom, + negative_to_zero=True, + name="safe_rate") diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index f94697c913..12ea75c5ea 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -607,7 +607,8 @@ def weighted_masked_objective(fn): score_array = math_ops.multiply(score_array, weights) score_array = math_ops.reduce_sum(score_array) weights = math_ops.reduce_sum(weights) - score_array = metrics_module.safe_div(score_array, weights) + score_array = math_ops.div_no_nan(score_array, weights, + negative_to_zero=True) return K.mean(score_array) return weighted diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 0983d62c59..6f4353f96a 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -136,23 +136,6 @@ def result_wrapper(result_fn): return tf_decorator.make_decorator(result_fn, decorated) -def safe_div(numerator, denominator): - """Divides two tensors element-wise, returning 0 if the denominator is <= 0. - - Args: - numerator: A `Tensor`. - denominator: A `Tensor`, with dtype matching `numerator`. - - Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` - """ - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero) - - def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight): """Squeeze or expand last dimension if needed. @@ -472,7 +455,7 @@ class Mean(Metric): state_ops.assign_add(self.count, num_values) def result(self): - return safe_div(self.total, self.count) + return math_ops.div_no_nan(self.total, self.count, negative_to_zero=True) class MeanMetricWrapper(Mean): diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index 87fc715783..c45b5035de 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -34,25 +34,11 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses -from tensorflow.python.ops.losses import losses_impl from tensorflow.python.ops.losses import util from tensorflow.python.platform import test from tensorflow.python.training import momentum as momentum_lib -safe_div = losses_impl._safe_div # pylint: disable=protected-access - - -class SafeDivTest(test.TestCase): - - def testEager(self): - with context.eager_mode(): - self.assertAllEqual(safe_div(constant_op.constant(1.0), - constant_op.constant(0.0)), 0.0) - self.assertAllEqual(safe_div(constant_op.constant(1.0), - 0.0), 0.0) - - class AbsoluteDifferenceLossTest(test.TestCase): def setUp(self): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 806539747e..1e65aac115 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -74,31 +74,6 @@ class Reduction(object): raise ValueError("Invalid ReductionKey %s." % key) -def _safe_div(numerator, denominator, name="value"): - """Computes a safe divide which returns 0 if the denominator is zero. - - Note that the function contains an additional conditional check that is - necessary for avoiding situations where the loss is zero causing NaNs to - creep into the gradient computation. - - Args: - numerator: An arbitrary `Tensor`. - denominator: `Tensor` whose shape matches `numerator` and whose values are - assumed to be non-negative. - name: An optional name for the returned op. - - Returns: - The element-wise value of the numerator divided by the denominator. - """ - return array_ops.where( - math_ops.greater(denominator, 0), - math_ops.div(numerator, array_ops.where( - math_ops.equal(denominator, 0), - array_ops.ones_like(denominator), denominator)), - array_ops.zeros_like(numerator), - name=name) - - def _safe_mean(losses, num_present): """Computes a safe mean of the losses. @@ -111,7 +86,8 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return _safe_div(total_loss, num_present) + return math_ops.div_no_nan(total_loss, num_present, + negative_to_zero=True, name="value") def _num_present(losses, weights, per_batch=False): @@ -599,14 +575,18 @@ def mean_pairwise_squared_error( keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, - num_present_per_batch - 1) + term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, + num_present_per_batch - 1, + negative_to_zero=True, + name="value") sum_diff = math_ops.reduce_sum( diffs, reduction_indices=reduction_indices, keepdims=True) - term2 = 2.0 * _safe_div( + term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), - math_ops.multiply(num_present_per_batch, num_present_per_batch - 1)) + math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), + negative_to_zero=True, + name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 763877c2d2..32f8fd3ed7 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -213,24 +213,6 @@ def _maybe_expand_labels(labels, predictions): lambda: array_ops.expand_dims(labels, -1, name=scope), lambda: labels) -def _safe_div(numerator, denominator, name): - """Divides two tensors element-wise, returning 0 if the denominator is <= 0. - - Args: - numerator: A real `Tensor`. - denominator: A real `Tensor`, with dtype matching `numerator`. - name: Name for the returned op. - - Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` - """ - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero, name=name) - - def _safe_scalar_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is 0. @@ -244,13 +226,7 @@ def _safe_scalar_div(numerator, denominator, name): """ numerator.get_shape().with_rank_at_most(1) denominator.get_shape().with_rank_at_most(1) - return control_flow_ops.cond( - math_ops.equal( - array_ops.constant(0.0, dtype=dtypes.float64), denominator), - lambda: array_ops.constant(0.0, dtype=dtypes.float64), - lambda: math_ops.div(numerator, denominator), - name=name) - + return math_ops.div_no_nan(numerator, denominator, name=name) def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None): """Calculate a streaming confusion matrix. @@ -402,11 +378,13 @@ def mean(values, with ops.control_dependencies([values]): update_count_op = state_ops.assign_add(count, num_values) - compute_mean = lambda _, t, c: _safe_div(t, c, 'value') + compute_mean = lambda _, t, c: math_ops.div_no_nan( + t, c, negative_to_zero=True, name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = _safe_div(update_total_op, update_count_op, 'update_op') + update_op = math_ops.div_no_nan(update_total_op, update_count_op, + negative_to_zero=True, name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -778,16 +756,21 @@ def auc(labels, """ dtp = tp[:num_thresholds - 1] - tp[1:] p = tp + fp - prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope') + prec_slope = math_ops.div_no_nan(dtp, p[:num_thresholds - 1] - p[1:], + negative_to_zero=True, + name='prec_slope') intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:]) safe_p_ratio = array_ops.where( math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0), - _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'), + math_ops.div_no_nan(p[:num_thresholds - 1], p[1:], + negative_to_zero=True, + name='recall_relative_ratio'), array_ops.ones_like(p[1:])) return math_ops.reduce_sum( - _safe_div( + math_ops.div_no_nan( prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)), tp[1:] + fn[1:], + negative_to_zero=True, name='pr_auc_increment'), name='interpolate_pr_auc') @@ -1068,7 +1051,8 @@ def mean_per_class_accuracy(labels, update_count_op = state_ops.scatter_add(count, labels, is_correct) def compute_mean_accuracy(_, count, total): - per_class_accuracy = _safe_div(count, total, None) + per_class_accuracy = math_ops.div_no_nan( + count, total, negative_to_zero=True, name=None) mean_accuracy_v = math_ops.reduce_mean( per_class_accuracy, name='mean_accuracy') return mean_accuracy_v @@ -1076,7 +1060,9 @@ def mean_per_class_accuracy(labels, mean_accuracy_v = _aggregate_across_towers( metrics_collections, compute_mean_accuracy, count, total) - update_op = _safe_div(update_count_op, update_total_op, name='update_op') + update_op = math_ops.div_no_nan(update_count_op, update_total_op, + negative_to_zero=True, + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -1385,12 +1371,15 @@ def mean_tensor(values, with ops.control_dependencies([values]): update_count_op = state_ops.assign_add(count, num_values) - compute_mean = lambda _, t, c: _safe_div(t, c, 'value') + compute_mean = lambda _, t, c: math_ops.div_no_nan( + t, c, negative_to_zero=True, name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = _safe_div(update_total_op, update_count_op, 'update_op') + update_op = math_ops.div_no_nan(update_total_op, update_count_op, + negative_to_zero=True, + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) -- GitLab From a6b016dc0a33f50f20fd1e8e3b9716ddbec75e57 Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Thu, 23 Aug 2018 11:14:25 +0900 Subject: [PATCH 0046/1357] comments regarding why rates are disabled --- tensorflow/core/kernels/extract_volume_patches_op.cc | 4 ++++ tensorflow/core/ops/array_ops.cc | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc index 80405c66dc..0f1d566c75 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op.cc +++ b/tensorflow/core/kernels/extract_volume_patches_op.cc @@ -87,6 +87,10 @@ class ExtractVolumePatchesOp : public UnaryOp { const int stride_cols = strides_[3]; /* + // TODO(hsgkim): enable rates + // Rates are disabled as of now due to Eigen's definitions of extract_volume_patch + // functions; none of them accept rates as its argument and rates are fixed to + // (1, 1, 1, 1, 1). A workaround has to be found for this. // In order to enable rates, uncomment the following lines and use // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize // calls. diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 48d8327a9e..6c8369200a 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2583,6 +2583,9 @@ REGISTER_OP("ExtractVolumePatches") } /* + // TODO(hsgkim): Enable rates. + // See extract_volume_patches_op.cc for why rates are disabled now. + std::vector rates; TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates)); if (rates.size() != 5) { -- GitLab From 52d3e5a3a7bece06da072dcfb3f4ac53e83f8470 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Wed, 22 Aug 2018 23:34:34 -0700 Subject: [PATCH 0047/1357] Added the BUILD files for tbb and updated the ngraph.BUILD with CPU library (DEX). --- WORKSPACE | 7 ++ tensorflow/workspace.bzl | 29 +++++--- third_party/ngraph/ngraph.BUILD | 109 ++++++++++++++++++++++++++++- third_party/ngraph/ngraph_tf.BUILD | 11 +-- third_party/ngraph/tbb.BUILD | 52 ++++++++++++++ 5 files changed, 188 insertions(+), 20 deletions(-) create mode 100644 third_party/ngraph/tbb.BUILD diff --git a/WORKSPACE b/WORKSPACE index 17961829a6..4af1a1e75f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -79,3 +79,10 @@ new_http_archive( "http://download.tensorflow.org/models/speech_commands_v0.01.zip", ], ) + +new_local_repository( + name = "ngraph", + path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph", + build_file = "//third_party/ngraph:ngraph.BUILD", +) + diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5d90d0fe64..951cb8a89d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -833,15 +833,26 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): ) tf_http_archive( - name = "ngraph", - urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - ], - sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", - strip_prefix = "ngraph-0.5.0", - build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), - ) + name = "tbb", + urls = [ + "https://mirror.bazel.build/github.com/01org/tbb/archive/tbb_2018.zip", + "https://github.com/01org/tbb/archive/tbb_2018.zip", + ], + sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13", + strip_prefix = "tbb-tbb_2018", + build_file = clean_dep("//third_party/ngraph:tbb.BUILD"), + ) + + # tf_http_archive( + # name = "ngraph", + # urls = [ + # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", + # "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", + # ], + # sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", + # strip_prefix = "ngraph-0.5.0", + # build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), + # ) tf_http_archive( name = "nlohmann_json_lib", diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD index 31aa3cee51..f1cf8acbf6 100644 --- a/third_party/ngraph/ngraph.BUILD +++ b/third_party/ngraph/ngraph.BUILD @@ -2,6 +2,112 @@ licenses(["notice"]) # 3-Clause BSD exports_files(["LICENSE"]) +cc_library( + name = "ngraph_headers", + hdrs = glob(["src/ngraph/**/*.hpp"]) , + visibility = ["//visibility:public"], +) + +cc_library( + name = "ngraph_cpu_backend", + srcs = [ + "src/ngraph/runtime/cpu/cpu_backend.cpp", + "src/ngraph/runtime/cpu/cpu_builder.cpp", + "src/ngraph/runtime/cpu/cpu_call_frame.cpp", + "src/ngraph/runtime/cpu/cpu_external_function.cpp", + "src/ngraph/runtime/cpu/cpu_kernels.cpp", + "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp", + "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp", + "src/ngraph/runtime/cpu/cpu_tensor_view.cpp", + "src/ngraph/runtime/cpu/cpu_tracing.cpp", + "src/ngraph/runtime/cpu/builder/add.cpp", + "src/ngraph/runtime/cpu/builder/allreduce.cpp", + "src/ngraph/runtime/cpu/builder/avg_pool.cpp", + "src/ngraph/runtime/cpu/builder/argmin.cpp", + "src/ngraph/runtime/cpu/builder/argmax.cpp", + "src/ngraph/runtime/cpu/builder/batch_norm.cpp", + "src/ngraph/runtime/cpu/builder/broadcast.cpp", + "src/ngraph/runtime/cpu/builder/bounded_relu.cpp", + "src/ngraph/runtime/cpu/builder/concat.cpp", + "src/ngraph/runtime/cpu/builder/convert.cpp", + "src/ngraph/runtime/cpu/builder/convert_layout.cpp", + "src/ngraph/runtime/cpu/builder/convolution.cpp", + "src/ngraph/runtime/cpu/builder/dot.cpp", + "src/ngraph/runtime/cpu/builder/function_call.cpp", + "src/ngraph/runtime/cpu/builder/lstm.cpp", + "src/ngraph/runtime/cpu/builder/lrn.cpp", + "src/ngraph/runtime/cpu/builder/matmul_bias.cpp", + "src/ngraph/runtime/cpu/builder/max.cpp", + "src/ngraph/runtime/cpu/builder/max_pool.cpp", + "src/ngraph/runtime/cpu/builder/min.cpp", + "src/ngraph/runtime/cpu/builder/one_hot.cpp", + "src/ngraph/runtime/cpu/builder/relu.cpp", + "src/ngraph/runtime/cpu/builder/pad.cpp", + "src/ngraph/runtime/cpu/builder/product.cpp", + "src/ngraph/runtime/cpu/builder/reduce_function.cpp", + "src/ngraph/runtime/cpu/builder/reduce_function_window.cpp", + "src/ngraph/runtime/cpu/builder/replace_slice.cpp", + "src/ngraph/runtime/cpu/builder/reshape.cpp", + "src/ngraph/runtime/cpu/builder/reverse.cpp", + "src/ngraph/runtime/cpu/builder/reverse_sequence.cpp", + "src/ngraph/runtime/cpu/builder/rnn.cpp", + "src/ngraph/runtime/cpu/builder/select.cpp", + "src/ngraph/runtime/cpu/builder/select_and_scatter.cpp", + "src/ngraph/runtime/cpu/builder/sigmoid.cpp", + "src/ngraph/runtime/cpu/builder/slice.cpp", + "src/ngraph/runtime/cpu/builder/softmax.cpp", + "src/ngraph/runtime/cpu/builder/sum.cpp", + "src/ngraph/runtime/cpu/kernel/eigen_thread_pool.cpp", + "src/ngraph/runtime/cpu/kernel/pad.cpp", + "src/ngraph/runtime/cpu/kernel/reduce_max.cpp", + "src/ngraph/runtime/cpu/kernel/reduce_sum.cpp", + "src/ngraph/runtime/cpu/kernel/reshape.cpp", + "src/ngraph/runtime/cpu/mkldnn_emitter.cpp", + "src/ngraph/runtime/cpu/mkldnn_invoke.cpp", + "src/ngraph/runtime/cpu/mkldnn_utils.cpp", + "src/ngraph/runtime/cpu/op/batch_dot.cpp", + "src/ngraph/runtime/cpu/op/batch_norm_relu.cpp", + "src/ngraph/runtime/cpu/op/bounded_relu.cpp", + "src/ngraph/runtime/cpu/op/group_conv.cpp", + "src/ngraph/runtime/cpu/op/conv_bias.cpp", + "src/ngraph/runtime/cpu/op/conv_relu.cpp", + "src/ngraph/runtime/cpu/op/convert_layout.cpp", + "src/ngraph/runtime/cpu/op/loop_kernel.cpp", + "src/ngraph/runtime/cpu/op/lstm.cpp", + "src/ngraph/runtime/cpu/op/matmul_bias.cpp", + "src/ngraph/runtime/cpu/op/max_pool_with_indices.cpp", + "src/ngraph/runtime/cpu/op/rnn.cpp", + "src/ngraph/runtime/cpu/op/sigmoid_mul.cpp", + "src/ngraph/runtime/cpu/pass/cpu_assignment.cpp", + "src/ngraph/runtime/cpu/pass/cpu_collapse_dims.cpp", + "src/ngraph/runtime/cpu/pass/cpu_concat_inputs.cpp", + "src/ngraph/runtime/cpu/pass/cpu_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_layout.cpp", + "src/ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_mat_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.cpp", + "src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_workspace_insertion.cpp", + ], + hdrs = glob(["src/ngraph/runtime/cpu/**/*.hpp"]) + glob([]), + deps = [ + ":ngraph_headers", + "@eigen_archive//:eigen", + "@nlohmann_json_lib", + "@tbb", + "@mkl_dnn//:mkl_dnn", + ], + copts = [ + "-I external/ngraph/src", + "-I external/nlohmann_json_lib/include/", + '-D SHARED_LIB_EXT=\\".so\\"', + '-D NGRAPH_VERSION=\\"0.5.0\\"', + '-D NGRAPH_DEX_ONLY', + ], + visibility = ["//visibility:public"], + alwayslink = 1, +) + cc_library( name = "ngraph_core", srcs = glob([ @@ -21,8 +127,9 @@ cc_library( "src/ngraph/runtime/interpreter/*.cpp", "src/ngraph/runtime/interpreter/*.hpp", ]), - hdrs = glob(["src/ngraph/**/*.hpp"]), deps = [ + ":ngraph_headers", + ":ngraph_cpu_backend", "@eigen_archive//:eigen", "@nlohmann_json_lib", ], diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index 4d96ccf2f2..0647d9926a 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -7,15 +7,6 @@ load( "tf_cc_test", ) -cc_library( - name = "ngraph_libs_linux", - srcs = [ - "lib/libiomp5.so", - "lib/libmklml_intel.so", - ], - visibility = ["//visibility:public"], -) - cc_library( name = "ngraph_tf", srcs = [ @@ -58,7 +49,7 @@ cc_library( "-I external/ngraph_tf/src", "-I external/ngraph_tf/logging", "-I external/ngraph/src", - "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", + #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", ], alwayslink = 1, visibility = ["//visibility:public"], diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD new file mode 100644 index 0000000000..c3e7f7fd35 --- /dev/null +++ b/third_party/ngraph/tbb.BUILD @@ -0,0 +1,52 @@ +licenses(["notice"]) # 3-Clause BSD + +exports_files(["LICENSE"]) + +genrule( + name = "build_tbb", + srcs = glob(["**"]) + [ + "@local_config_cc//:toolchain", + ], + cmd = """ + set -e + WORK_DIR=$$PWD + DEST_DIR=$$PWD/$(@D) + export PATH=$$(dirname $(AR)):$$PATH + export CXXFLAGS=$(CC_FLAGS) + export NM=$(NM) + export AR=$(AR) + cd $$(dirname $(location :Makefile)) + + #TBB's build needs some help to figure out what compiler it's using + if $$CXX --version | grep clang &> /dev/null; then + COMPILER_OPT="compiler=clang" + else + COMPILER_OPT="compiler=gcc" + fi + + # uses extra_inc=big_iron.inc to specify that static libraries are + # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792 + make tbb_build_prefix="build" \ + extra_inc=big_iron.inc \ + $$COMPILER_OPT; \ + + echo cp build/build_{release,debug}/*.a $$DEST_DIR + cp build/build_{release,debug}/*.a $$DEST_DIR + cd $$WORK_DIR + """, + outs = [ + "libtbb.a", + "libtbbmalloc.a", + ] +) + +cc_library( + name = "tbb", + hdrs = glob([ + "include/serial/**", + "include/tbb/**/**", + ]), + srcs = ["libtbb.a"], + includes = ["include"], + visibility = ["//visibility:public"], +) \ No newline at end of file -- GitLab From 38f811077dd52820eaa3d5c684f41142de01c7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 23 Aug 2018 16:23:03 +0800 Subject: [PATCH 0048/1357] CLN: remove negative_to_zero argument --- .../contrib/losses/python/losses/loss_ops.py | 9 +++-- .../contrib/metrics/python/ops/metric_ops.py | 20 +++++------ tensorflow/contrib/rate/rate.py | 4 +-- .../python/keras/engine/training_utils.py | 4 +-- tensorflow/python/keras/metrics.py | 2 +- tensorflow/python/ops/losses/losses_impl.py | 18 +++++----- tensorflow/python/ops/math_ops.py | 5 +-- tensorflow/python/ops/math_ops_test.py | 13 -------- tensorflow/python/ops/metrics_impl.py | 33 ++++++++++--------- 9 files changed, 47 insertions(+), 61 deletions(-) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 29f7953c3b..8a0932c376 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -78,8 +78,9 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return math_ops.div_no_nan(total_loss, num_present, - negative_to_zero=True, name="value") + return math_ops.div_no_nan(total_loss, + math_ops.maximum(num_present, 0), + name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -585,14 +586,12 @@ def mean_pairwise_squared_error(predictions, num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, - num_present_per_batch, - negative_to_zero=True, + math_ops.maximum(num_present_per_batch), name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff), math_ops.square(num_present_per_batch), - negative_to_zero=True, name="value") loss = _scale_losses(term1 - term2, weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index d972e7da53..bfef0816aa 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -3188,12 +3188,12 @@ def streaming_covariance(predictions, # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = math_ops.div_no_nan( - math_ops.reduce_sum(weighted_predictions), batch_count, - negative_to_zero=True, + math_ops.reduce_sum(weighted_predictions), + math_ops.maximum(batch_count, 0), name='batch_mean_prediction') delta_mean_prediction = math_ops.div_no_nan( - (batch_mean_prediction - mean_prediction) * batch_count, update_count, - negative_to_zero=True, + (batch_mean_prediction - mean_prediction) * batch_count, + math_ops.maximum(update_count, 0), name='delta_mean_prediction') update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) @@ -3202,12 +3202,12 @@ def streaming_covariance(predictions, # batch_mean_label is E[y_B] in the update equation batch_mean_label = math_ops.div_no_nan( - math_ops.reduce_sum(weighted_labels), batch_count, - negative_to_zero=True, + math_ops.reduce_sum(weighted_labels), + math_ops.maximum(batch_count, 0), name='batch_mean_label') delta_mean_label = math_ops.div_no_nan( - (batch_mean_label - mean_label) * batch_count, update_count, - negative_to_zero=True, + (batch_mean_label - mean_label) * batch_count, + math_ops.maximum(update_count, 0), name='delta_mean_label') update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation @@ -3871,8 +3871,8 @@ def cohen_kappa(labels, total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( math_ops.div_no_nan( - pe_row * pe_col, total, - negative_to_zero=True, + pe_row * pe_col, + math_ops.maximum(total, 0), name=None)) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 68f5a6e58a..489d5cce78 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -141,6 +141,6 @@ class Rate(object): state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) - return math_ops.div_no_nan(self.numer, self.denom, - negative_to_zero=True, + return math_ops.div_no_nan(self.numer, + math_op.maximum(self.denom, 0), name="safe_rate") diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 12ea75c5ea..eeca60dc57 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -607,8 +607,8 @@ def weighted_masked_objective(fn): score_array = math_ops.multiply(score_array, weights) score_array = math_ops.reduce_sum(score_array) weights = math_ops.reduce_sum(weights) - score_array = math_ops.div_no_nan(score_array, weights, - negative_to_zero=True) + score_array = math_ops.div_no_nan(score_array, + math_ops.maximum(weights, 0)) return K.mean(score_array) return weighted diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 6f4353f96a..b5d3138da2 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -455,7 +455,7 @@ class Mean(Metric): state_ops.assign_add(self.count, num_values) def result(self): - return math_ops.div_no_nan(self.total, self.count, negative_to_zero=True) + return math_ops.div_no_nan(self.total, math_ops.maximum(self.count, 0)) class MeanMetricWrapper(Mean): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 1e65aac115..a980a43f62 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -86,8 +86,9 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return math_ops.div_no_nan(total_loss, num_present, - negative_to_zero=True, name="value") + return math_ops.div_no_nan(total_loss, + math_ops.maximum(num_present, 0), + name="value") def _num_present(losses, weights, per_batch=False): @@ -575,17 +576,18 @@ def mean_pairwise_squared_error( keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, - num_present_per_batch - 1, - negative_to_zero=True, - name="value") + term1 = 2.0 * math_ops.div_no_nan( + sum_squares_diff_per_batch, + math_ops.maximum(num_present_per_batch - 1, 0), + name="value") sum_diff = math_ops.reduce_sum( diffs, reduction_indices=reduction_indices, keepdims=True) term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), - math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), - negative_to_zero=True, + math_ops.maximum( + math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), + 0), name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a693b1ebac..67ea534639 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1039,14 +1039,13 @@ def div(x, y, name=None): @tf_export("div_no_nan") -def div_no_nan(x, y, name=None, negative_to_zero=False): +def div_no_nan(x, y, name=None): """Computes an unsafe divide which returns 0 if the y is zero. Args: x: A `Tensor`. Must be one of the following types: `float32`, `float64`. y: A `Tensor` whose dtype is compatible with `x`. name: A name for the operation (optional). - negative_to_zero: If `True`, negative is treated as zero in denominator. Returns: The element-wise value of the x divided by y. """ @@ -1059,8 +1058,6 @@ def div_no_nan(x, y, name=None, negative_to_zero=False): if x_dtype != y_dtype: raise TypeError("x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype)) - if negative_to_zero: - y = gen_math_ops.maximum(y, 0, name='negative_to_zero') return gen_math_ops.div_no_nan(x, y, name=name) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 6e1e5f37c8..6bd41020c5 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -487,19 +487,6 @@ class DivNoNanTest(test_util.TensorFlowTestCase): tf_result = math_ops.div_no_nan(nums, divs).eval() self.assertAllEqual(tf_result, np_result) - def testNegativeToZero(self): - for dtype in [np.float32, np.float64]: - nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1) - divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24) - - np_result = np.true_divide(nums, divs) - np_result[:, divs[0] <= 0] = 0 - - with self.cached_session(): - tf_result = math_ops.div_no_nan(nums, divs, - negative_to_zero=True).eval() - self.assertAllEqual(tf_result, np_result) - if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 32f8fd3ed7..e449318020 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -379,12 +379,13 @@ def mean(values, update_count_op = state_ops.assign_add(count, num_values) compute_mean = lambda _, t, c: math_ops.div_no_nan( - t, c, negative_to_zero=True, name='value') + t, math_ops.maximum(c, 0), name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = math_ops.div_no_nan(update_total_op, update_count_op, - negative_to_zero=True, name='update_op') + update_op = math_ops.div_no_nan(update_total_op, + math_ops.maximum(update_count_op, 0), + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -756,21 +757,21 @@ def auc(labels, """ dtp = tp[:num_thresholds - 1] - tp[1:] p = tp + fp - prec_slope = math_ops.div_no_nan(dtp, p[:num_thresholds - 1] - p[1:], - negative_to_zero=True, - name='prec_slope') + prec_slope = math_ops.div_no_nan( + dtp, + math_ops.maximum(p[:num_thresholds - 1] - p[1:], 0), + name='prec_slope') intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:]) safe_p_ratio = array_ops.where( math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0), - math_ops.div_no_nan(p[:num_thresholds - 1], p[1:], - negative_to_zero=True, + math_ops.div_no_nan(p[:num_thresholds - 1], + math_ops.maximum(p[1:], 0), name='recall_relative_ratio'), array_ops.ones_like(p[1:])) return math_ops.reduce_sum( math_ops.div_no_nan( prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)), - tp[1:] + fn[1:], - negative_to_zero=True, + math_ops.maximum(tp[1:] + fn[1:], 0), name='pr_auc_increment'), name='interpolate_pr_auc') @@ -1052,7 +1053,7 @@ def mean_per_class_accuracy(labels, def compute_mean_accuracy(_, count, total): per_class_accuracy = math_ops.div_no_nan( - count, total, negative_to_zero=True, name=None) + count, math_ops.maximum(total, 0), name=None) mean_accuracy_v = math_ops.reduce_mean( per_class_accuracy, name='mean_accuracy') return mean_accuracy_v @@ -1060,8 +1061,8 @@ def mean_per_class_accuracy(labels, mean_accuracy_v = _aggregate_across_towers( metrics_collections, compute_mean_accuracy, count, total) - update_op = math_ops.div_no_nan(update_count_op, update_total_op, - negative_to_zero=True, + update_op = math_ops.div_no_nan(update_count_op, + math_ops.maximum(update_total_op, 0), name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -1372,13 +1373,13 @@ def mean_tensor(values, update_count_op = state_ops.assign_add(count, num_values) compute_mean = lambda _, t, c: math_ops.div_no_nan( - t, c, negative_to_zero=True, name='value') + t, math_ops.maximum(c, 0), name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = math_ops.div_no_nan(update_total_op, update_count_op, - negative_to_zero=True, + update_op = math_ops.div_no_nan(update_total_op, + math_ops.maximum(update_count_op, 0), name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) -- GitLab From 407a64b773f15bfe67a2b5b1979134368464b6ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 23 Aug 2018 16:52:00 +0800 Subject: [PATCH 0049/1357] TST: revise test case and too long line --- .../python/estimator/canned/boosted_trees.py | 7 +- .../estimator/canned/boosted_trees_test.py | 125 +++++++++++++----- 2 files changed, 96 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index b1d5d60fb0..f2a5b9178b 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -213,8 +213,13 @@ def _generate_feature_name_mapping(sorted_feature_columns): feature_column_lib._VocabularyListCategoricalColumn): # pylint:disable=protected-access for value in categorical_column.vocabulary_list: names.append('{}:{}'.format(column.name, value)) + elif isinstance(categorical_column, + feature_column_lib._BucketizedColumn): # pylint:disable=protected-access + boundaries = [-np.inf] + list(categorical_column.boundaries) + [np.inf] + for pair in zip(boundaries[:-1], boundaries[1:]): + names.append('{}:{}'.format(column.name, pair)) else: - for num in categorical_column._num_buckets: # pylint:disable=protected-access + for num in range(categorical_column._num_buckets): # pylint:disable=protected-access names.append('{}:{}'.format(column.name, num)) else: names.append(column.name) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 24d3a3501e..7620f73425 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -564,13 +564,17 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names_expected = ['f_0_bucketized', + 'f_2_bucketized', + 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.833933, 0.606342, 0.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.579010, 0.420990, 0.0], importances) @@ -599,7 +603,9 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(ValueError, 'empty serialized string'): est.experimental_feature_importances(normalize=True) - def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text): + def _create_fake_checkpoint_with_tree_ensemble_proto(self, + est, + tree_ensemble_text): with ops.Graph().as_default(): with ops.name_scope('boosted_trees') as name: tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) @@ -731,14 +737,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): tree_weights: 1.0 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', + 'f_2_bucketized', + 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) + # Gain sum for each features: + # = 1.0 * [3 + 1, 2, 2] + 1.0 * [1, 1, 0] self.assertAllClose([5.0, 3.0, 2.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) @@ -820,14 +833,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): tree_weights: 0.6 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', + 'f_2_bucketized', + 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) + # Gain sum for each features: + # = 0.4 * [12.5, 0, 5] + 0.6 * [0, 5, 0] + 1.0 * [0, 0, 0] self.assertAllClose([5.0, 3.0, 2.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) @@ -856,11 +876,15 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): tree_weights: 1.0 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) # Reverse order because feature importances are sorted by np.argsort(f)[::-1] - feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + feature_names_expected = ['f_2_bucketized', + 'f_1_bucketized', + 'f_0_bucketized'] + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.0, 0.0, 0.0], importances) @@ -868,17 +892,20 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'all empty or contain only a root node'): est.experimental_feature_importances(normalize=True) - def TestFeatureImportancesNamesForCategoricalColumn(self): + def testFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column( - 'an_uninformative_feature', dtype=dtypes.float32), + 'continuous', dtype=dtypes.float32), BUCKET_BOUNDARIES) + bucketized_indicator = feature_column.indicator_column(bucketized_col) est = boosted_trees.BoostedTreesRegressor( - feature_columns=[bucketized_col, feature_indicator], + feature_columns=[feature_indicator, + bucketized_col, + bucketized_indicator], n_batches_per_layer=1, n_trees=2, learning_rate=1.0, @@ -898,7 +925,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): } nodes { bucketized_split { - feature_id: 3 + feature_id: 4 left_id: 3 right_id: 4 } @@ -930,36 +957,63 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): right_id: 2 } metadata { - gain: 3.0 + gain: 1.0 + } + } + nodes { + bucketized_split { + feature_id: 5 + left_id: 3 + right_id: 4 + } + metadata { + gain: 2.0 } } nodes { leaf { - scalar: -0.34 + scalar: -2.34 } } nodes { leaf { - scalar: 1.34 + scalar: 3.34 + } + } + nodes { + leaf { + scalar: 4.34 } } } tree_weights: 1.0 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['categorical_indicator:good', - 'an_uninformative_feature_bucketized', - 'categorical_indicator:ok', - 'categorical_indicator:bad'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + feature_names_expected = ['categorical_indicator:ok', + 'continuous_bucketized_indicator:(-2.0, 0.5)', + 'continuous_bucketized_indicator:(-inf, -2.0)', + 'categorical_indicator:bad', + # Reverse order because feature importances + # are sorted by np.argsort(f)[::-1] + 'continuous_bucketized_indicator:(12.0, inf)', + 'continuous_bucketized_indicator:(0.5, 12.0)', + 'continuous_bucketized', + 'categorical_indicator:good'] + + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([5.0, 3.0, 2.0, 0.0], importances) + # Gain sum for each features: + # = 1.0 * [5, 0, 2, 0, 0, 0, 0, 0] + 1.0 * [0, 2, 0, 1, 0, 0, 0, 0] + self.assertAllClose([5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances) + self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances) def testNegativeFeatureImportances(self): est = boosted_trees.BoostedTreesClassifier( @@ -995,7 +1049,8 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): } tree_weights: -1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) with self.assertRaisesRegexp(AssertionError, 'non-negative'): est.experimental_feature_importances(normalize=False) -- GitLab From cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Fri, 24 Aug 2018 11:51:34 +0800 Subject: [PATCH 0050/1357] Refine LeakyRelu codes and update APIs. --- .../api_def/base_api/api_def_LeakyRelu.pbtxt | 4 ++++ .../base_api/api_def_LeakyReluGrad.pbtxt | 24 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 2 +- tensorflow/python/eager/pywrap_tfe_src.cc | 2 +- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt new file mode 100644 index 0000000000..4a61889f54 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LeakyRelu" + summary: "Computes rectified linear: `max(features, features * alpha)`." +} diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt new file mode 100644 index 0000000000..e427526602 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt @@ -0,0 +1,24 @@ +op { + graph_op_name: "LeakyReluGrad" + visibility: HIDDEN + in_arg { + name: "gradients" + description: < 0) + alpha * gradients * (featurs <= 0)`. +END + } + summary: "Computes rectified linear gradients for a LeakyRelu operation." +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 837e91bc23..7693c2d485 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13637,7 +13637,7 @@ op { } } op { - name: "LeakykReluGrad" + name: "LeakyReluGrad" input_arg { name: "gradients" type_attr: "T" diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9b3b5fd7aa..18fafd0de1 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { "SoftplusGrad", "Softsign", "ReluGrad", + "LeakyRelu", "LeakyReluGrad", "Conv2D", "DepthwiseConv2dNative", @@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) { "BiasAdd", "Relu", "Relu6", - "LeakyRelu", "Elu", "Selu", "SparseSoftmaxCrossEntropyWithLogits", -- GitLab From aa02f7f3622dca8c7b03e745cdb8a10797f32f61 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Thu, 23 Aug 2018 22:56:22 -0700 Subject: [PATCH 0051/1357] Updated build files --- third_party/ngraph/ngraph.BUILD | 8 ++-- third_party/ngraph/ngraph_tf.BUILD | 2 +- third_party/ngraph/tbb.BUILD | 72 +++++++++++++++++------------- 3 files changed, 45 insertions(+), 37 deletions(-) diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD index f1cf8acbf6..3d9c3ac044 100644 --- a/third_party/ngraph/ngraph.BUILD +++ b/third_party/ngraph/ngraph.BUILD @@ -4,7 +4,7 @@ exports_files(["LICENSE"]) cc_library( name = "ngraph_headers", - hdrs = glob(["src/ngraph/**/*.hpp"]) , + hdrs = glob(["src/ngraph/**/*.hpp"]), visibility = ["//visibility:public"], ) @@ -102,7 +102,7 @@ cc_library( "-I external/nlohmann_json_lib/include/", '-D SHARED_LIB_EXT=\\".so\\"', '-D NGRAPH_VERSION=\\"0.5.0\\"', - '-D NGRAPH_DEX_ONLY', + "-D NGRAPH_DEX_ONLY", ], visibility = ["//visibility:public"], alwayslink = 1, @@ -124,8 +124,8 @@ cc_library( "src/ngraph/pass/*.hpp", "src/ngraph/runtime/*.cpp", "src/ngraph/type/*.cpp", - "src/ngraph/runtime/interpreter/*.cpp", - "src/ngraph/runtime/interpreter/*.hpp", + #"src/ngraph/runtime/interpreter/*.cpp", + #"src/ngraph/runtime/interpreter/*.hpp", ]), deps = [ ":ngraph_headers", diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index 0647d9926a..d0231e468e 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -71,7 +71,7 @@ tf_cc_test( ], extra_copts = [ "-fexceptions ", - "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", + #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", "-I external/ngraph_tf/src", "-I external/ngraph_tf/logging", "-I external/ngraph/src", diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD index c3e7f7fd35..7c760cb3b3 100644 --- a/third_party/ngraph/tbb.BUILD +++ b/third_party/ngraph/tbb.BUILD @@ -2,42 +2,50 @@ licenses(["notice"]) # 3-Clause BSD exports_files(["LICENSE"]) +# Taken from: https://github.com/rnburn/satyr/blob/master/bazel/tbb.BUILD +# License: MIT +# See: https://github.com/rnburn/satyr/blob/master/LICENSE + genrule( - name = "build_tbb", - srcs = glob(["**"]) + [ - "@local_config_cc//:toolchain", - ], - cmd = """ - set -e - WORK_DIR=$$PWD - DEST_DIR=$$PWD/$(@D) - export PATH=$$(dirname $(AR)):$$PATH - export CXXFLAGS=$(CC_FLAGS) - export NM=$(NM) - export AR=$(AR) - cd $$(dirname $(location :Makefile)) - - #TBB's build needs some help to figure out what compiler it's using - if $$CXX --version | grep clang &> /dev/null; then + name = "build_tbb", + srcs = glob(["**"]) + [ + "@local_config_cc//:toolchain", + ], + cmd = """ + set -e + WORK_DIR=$$PWD + DEST_DIR=$$PWD/$(@D) + export PATH=$$(dirname $(AR)):$$PATH + export CXXFLAGS=$(CC_FLAGS) + export NM=$(NM) + export AR=$(AR) + cd $$(dirname $(location :Makefile)) + + #TBB's build needs some help to figure out what compiler it's using + if $$CXX --version | grep clang &> /dev/null; then COMPILER_OPT="compiler=clang" - else - COMPILER_OPT="compiler=gcc" - fi + else + COMPILER_OPT="compiler=gcc" - # uses extra_inc=big_iron.inc to specify that static libraries are - # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792 - make tbb_build_prefix="build" \ + # # Workaround for TBB bug + # # See https://github.com/01org/tbb/issues/59 + # CXXFLAGS="$$CXXFLAGS -flifetime-dse=1" + fi + + # uses extra_inc=big_iron.inc to specify that static libraries are + # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792 + make tbb_build_prefix="build" \ extra_inc=big_iron.inc \ $$COMPILER_OPT; \ - echo cp build/build_{release,debug}/*.a $$DEST_DIR - cp build/build_{release,debug}/*.a $$DEST_DIR - cd $$WORK_DIR - """, - outs = [ - "libtbb.a", - "libtbbmalloc.a", - ] + echo cp build/build_{release,debug}/*.a $$DEST_DIR + cp build/build_{release,debug}/*.a $$DEST_DIR + cd $$WORK_DIR + """, + outs = [ + "libtbb.a", + "libtbbmalloc.a", + ], ) cc_library( @@ -45,8 +53,8 @@ cc_library( hdrs = glob([ "include/serial/**", "include/tbb/**/**", - ]), + ]), srcs = ["libtbb.a"], includes = ["include"], visibility = ["//visibility:public"], -) \ No newline at end of file +) -- GitLab From c7c152981cdf9494dce9efdeed04a9c3ae7a8e3d Mon Sep 17 00:00:00 2001 From: weidankong Date: Fri, 24 Aug 2018 11:23:26 -0700 Subject: [PATCH 0052/1357] Accumulated Gradient Normalization Optimizer --- tensorflow/contrib/opt/BUILD | 19 ++ tensorflow/contrib/opt/__init__.py | 3 + .../opt/python/training/agn_optimizer.py | 309 ++++++++++++++++++ .../opt/python/training/agn_optimizer_test.py | 279 ++++++++++++++++ 4 files changed, 610 insertions(+) create mode 100644 tensorflow/contrib/opt/python/training/agn_optimizer.py create mode 100644 tensorflow/contrib/opt/python/training/agn_optimizer_test.py diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 5319a8b655..642cda7845 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -16,6 +16,7 @@ py_library( "__init__.py", "python/training/adamax.py", "python/training/addsign.py", + "python/training/agn_optimizer.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/elastic_average_optimizer.py", "python/training/external_optimizer.py", @@ -242,6 +243,24 @@ tf_py_test( ], ) +tf_py_test( + name = "agn_optimizer_test", + srcs = ["python/training/agn_optimizer_test.py"], + additional_deps = [ + ":opt_py", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:array_ops", + "//tensorflow/python:variables", + "//tensorflow/python:framework", + "//tensorflow/python:platform", + "//tensorflow/python:training", + "//tensorflow/python:ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//third_party/py/numpy", + ], +) + tf_py_test( name = "elastic_average_optimizer_test", srcs = ["python/training/elastic_average_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 781621dba0..b814a57680 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -21,6 +21,7 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * +from tensorflow.contrib.opt.python.training.agn_optimizer import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.elastic_average_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * @@ -59,6 +60,8 @@ _allowed_symbols = [ 'VariableClippingOptimizer', 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', + 'AGNOptimizer', + 'AGNCustomGetter', 'ElasticAverageOptimizer', 'ElasticAverageCustomGetter', 'ModelAverageOptimizer', diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py new file mode 100644 index 0000000000..dc1f8d6347 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -0,0 +1,309 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.training import optimizer +from tensorflow.python.training import session_run_hook +from tensorflow.python.training import gradient_descent + + +GLOBAL_VARIABLE_NAME = 'global_center_variable' +GRAD_VARIABLE_NAME = 'grad_variable' + +class AGNCustomGetter(object): + """Custom_getter class is used to do: + 1. Change trainable variables to local collection and place them at worker + device + 2. Generate global variables(global center variables) + 3. Generate grad variables(gradients) which record the gradients sum + and place them at worker device + Notice that the class should be used with tf.replica_device_setter, + so that the global center variables and global step variable can be placed + at ps device. + """ + def __init__(self, worker_device): + """ + Args: + worker_device: put the grad_variables on worker device + """ + self._worker_device = worker_device + self._global_map = {} + self._grad_map = {} + + def __call__(self, getter, name, trainable, collections, *args, **kwargs): + if trainable: + with ops.device(self._worker_device): + local_var = getter( + name, + trainable=True, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + *args, + **kwargs) + if kwargs['reuse'] == True: + return local_var + global_center_variable = getter( + name='%s/%s' % (GLOBAL_VARIABLE_NAME, name), + trainable=False, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + *args, + **kwargs) + + with ops.device(self._worker_device): + grad_variable = getter( + name='%s/%s' % (GRAD_VARIABLE_NAME, name), + trainable=False, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + *args, + **kwargs) + if kwargs['partitioner'] is None: + self._grad_map[local_var] = grad_variable + self._global_map[local_var] = global_center_variable + else: + v_list = list(local_var) + for i in range(len(v_list)): + self._grad_map[v_list[i]] = list(grad_variable)[i] + self._global_map[v_list[i]] = list(global_center_variable)[i] + return local_var + else: + return getter(name, + trainable=trainable, + collections=collections, + *args, + **kwargs) + +class AGNOptimizer(optimizer.Optimizer): + """Wrapper that implements the Accumulated GradientNormalization algorithm. + Reference: + Accumulated Gradient Normalization: Joeri Hermans ACML2017 + https://arxiv.org/abs/1710.02368 + """ + + def __init__(self, + optimizer, + num_worker, + custom_getter, + communication_period=10, + use_locking=True, + name='AGNOptimizer'): + """Construct a new AGN optimizer. + + Args: + optimizer: input optimizer, can be sgd/momentum/adam etc. + num_worker: The number of workers + custom_getter: The AGNCustomGetter + communication_period: An int point value to controls the frequency + of the communication between every worker and the ps. + use_locking: If True use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "AGNOptimizer". + + """ + super(AGNOptimizer, self).__init__(use_locking, name) + self._opt = optimizer + self._num_worker = num_worker + self._period = communication_period + self._global_map = custom_getter._global_map + self._grad_map = custom_getter._grad_map + self._local_step = variable_scope.get_variable( + initializer=0, + trainable=False, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + name='local_step') + self._opt._prepare() + + def compute_gradients(self, + loss, + var_list=None, + gate_gradients=optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None): + """Compute gradients of `loss` for the variables in `var_list`. + Args: + loss: A Tensor containing the value to minimize. + var_list: Optional list or tuple of `tf.Variable` to update to minimize + `loss`. Defaults to the list of variables collected in the graph + under the key `GraphKey.TRAINABLE_VARIABLES`. + gate_gradients: How to gate the computation of gradients. Can be + `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class `AggregationMethod`. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + grad_loss: Optional. A `Tensor` holding the gradient computed for `loss` + + Returns: + A list of (gradient, variable) pairs. Variable is always present, but + gradient can be `None`. + """ + if not var_list: + var_list = variables.trainable_variables() + return self._opt.compute_gradients(loss, + var_list, + gate_gradients, + aggregation_method, + colocate_gradients_with_ops, + grad_loss) + + def _adjust_optimizer_variable_collection(self, opt_vars): + """ Move optimizer created variables to local collection + """ + g = ops.get_default_graph() + idx = 0 + for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): + var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] + name = var.op.name + if name in opt_vars: + ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) + del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] + else: + idx += 1 + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Apply gradients to global variables. + + This is the second part of `minimize()`. It returns an `Operation` that + applies gradients. + + Args: + grads_and_vars: List of (gradient, variable) pairs as returned by + `compute_gradients()`. + global_step: Optional `Variable` to increment by one after the + variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the `Optimizer` constructor. + + Returns: + An `Operation` that applies the specified gradients. If `global_step` + was not None, that operation also increments `global_step`. + """ + local_vars = [v for g, v in grads_and_vars if g is not None] + grads = [g for g, v in grads_and_vars if g is not None] + # theta = theta - lr * grad + global_old = set(n.op.name for n in variables.global_variables()) + local_update_op = self._opt.apply_gradients(grads_and_vars) + global_new = set(n.op.name for n in variables.global_variables()) + + self._adjust_optimizer_variable_collection(global_new - global_old) + + # a = a + grad + update_ops = [] + update_ops.append(local_update_op) + grad_vars = [self._grad_map[var] for var in local_vars] + for g, grad_var in zip (grads, grad_vars): + update_ops.append(state_ops.assign_add(grad_var, g)) + + global_center_vars = [self._global_map[var] for var in local_vars] + + # update global variables. + def _Update_global_variables(): + global_norm = [] + # a = a / t + for g in grad_vars: + global_norm.append(state_ops.assign(g, g / self._period)) + # apply + with ops.control_dependencies(global_norm): + apply_global_op = self._opt.apply_gradients(zip(grad_vars, + global_center_vars)) + + # pull + with ops.control_dependencies([apply_global_op]): + update_ops = [] + if global_step: + with ops.colocate_with(global_step): + update_ops.append(state_ops.assign_add(global_step, 1)) + + for lvar in local_vars: + g_val = self._global_map[lvar].read_value() + update_ops.append(state_ops.assign(lvar, g_val)) + for grad_var in grad_vars: + update_ops.append(state_ops.assign(grad_var, + array_ops.zeros_like(grad_var))) + variable_update = control_flow_ops.group(*(update_ops)) + return variable_update + + local_update = state_ops.assign_add( + self._local_step, 1, name='local_step_update').op + + with ops.control_dependencies([local_update]): + condition = math_ops.equal( + math_ops.mod(self._local_step, self._period), 0) + with ops.control_dependencies(update_ops): + conditional_update = control_flow_ops.cond( + condition, _Update_global_variables, control_flow_ops.no_op) + return conditional_update + + def get_init_op(self, task_index): + """Returns the op to let all the local variables and local center + variables equal to the global center variables before the training begins + """ + init_ops = [] + local_vars = variables.trainable_variables() + global_center_vars = [self._global_map[var] for var in local_vars] + grad_vars = [self._grad_map[var] for var in local_vars] + if not (local_vars and global_center_vars and grad_vars): + raise ValueError('The lists of local_variables, global_center_variables,' + 'grad_center_variables should not be empty') + for lvar, gc_var in zip(local_vars, global_center_vars): + init_ops.append(state_ops.assign(gc_var, lvar)) + for g in grad_vars: + init_ops.append(state_ops.assign(g, array_ops.zeros_like(g))) + init_op = control_flow_ops.group(*(init_ops)) + return init_op + + def make_session_run_hook(self, is_chief, task_index): + """Creates a hook to handle AGNOptimizerHook ops such as initialization.""" + return _AGNOptimizerHook(self, is_chief, task_index) + + +class _AGNOptimizerHook(session_run_hook.SessionRunHook): + + def __init__(self, agn_optimizer, is_chief, task_index): + """Creates hook to handle AGNOptimizer initialization ops. + + Args: + agn_optimizer: `AGNOptimizer` which this hook will initialize. + is_chief: `Bool`, whether is this a chief replica or not. + task_index: int, task_index of worker + """ + self._agn_optimizer = agn_optimizer + self._is_chief = is_chief + self._task_index = task_index + + def begin(self): + self._local_init_op = variables.local_variables_initializer() + self._global_init_op = None + if self._is_chief: + self._global_init_op = variables.global_variables_initializer() + self._variable_init_op = self._agn_optimizer.get_init_op(self._task_index) + + def after_create_session(self, session, coord): + """Run initialization ops""" + session.run(self._variable_init_op) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py new file mode 100644 index 0000000000..091943de02 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -0,0 +1,279 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Tests for EAOptimizer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import portpicker + +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import device_setter +from tensorflow.python.training import momentum +from tensorflow.python.training import server_lib +from tensorflow.python.training import training +from tensorflow.python.training import training_util + +from tensorflow.contrib.opt.python.training.agn_optimizer import \ + AGNOptimizer, AGNCustomGetter, GLOBAL_VARIABLE_NAME + + +def create_local_cluster(num_workers, num_ps, protocol="grpc"): + """Create local GRPC servers and return them.""" + worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] + ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] + cluster_dict = { + "worker": ["localhost:%s" % port for port in worker_ports], + "ps": ["localhost:%s" % port for port in ps_ports] + } + cs = server_lib.ClusterSpec(cluster_dict) + + workers = [ + server_lib.Server( + cs, job_name="worker", protocol=protocol, task_index=ix, start=True) + for ix in range(num_workers) + ] + ps_servers = [ + server_lib.Server( + cs, job_name="ps", protocol=protocol, task_index=ix, start=True) + for ix in range(num_ps) + ] + + return cluster_dict, workers, ps_servers + + +# Creates the workers and return their sessions, graphs, train_ops. +# Cheif worker will update at last +def _get_workers(num_workers, period, workers, num_ps=1): + sessions = [] + graphs = [] + train_ops = [] + for worker_id in range(num_workers): + graph = ops.Graph() + is_chief = (worker_id == 0) + with graph.as_default(): + worker_device = "/job:worker/task:%d/cpu:0" % (worker_id) + ps_device = device_setter.replica_device_setter( + worker_device=worker_device, + ps_device="/job:ps/task:0/cpu:0", + ps_tasks=1) + agn_getter = AGNCustomGetter(worker_device=worker_device) + with variable_scope.variable_scope( + "", custom_getter=agn_getter), ops.device(ps_device): + global_step = training_util.get_or_create_global_step() + var_0 = variable_scope.get_variable(initializer=0.0, name="v0") + var_1 = variable_scope.get_variable(initializer=0.5, name="v1") + if num_ps > 1: + with variable_scope.variable_scope("", + partitioner=partitioned_variables.fixed_size_partitioner( + num_ps, axis=0), + custom_getter=agn_getter), ops.device(ps_device): + + partition_var = variable_scope.get_variable( + 'partition_var', + shape=[2, 4], + initializer=init_ops.zeros_initializer) + part_0 = list(partition_var)[0] + part_1 = list(partition_var)[1] + + with ops.device("/job:worker/task:" + str(worker_id)): + grads_0 = constant_op.constant(-1.0) + grads_1 = constant_op.constant(-1.0) + grads_part_0 = constant_op.constant([[-1., -1., -1., -1.]]) + grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]]) + + optimizer = \ + momentum.MomentumOptimizer(learning_rate=0.1, momentum=0.0) + opt = AGNOptimizer( + optimizer, + num_worker=num_workers, + communication_period=period, + custom_getter=agn_getter) + if num_ps == 1: + train_op = [ + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), + global_step) + ] + else: + train_op = [ + opt.apply_gradients(([grads_0, var_0], + [grads_1, var_1], + [grads_part_0, part_0], + [grads_part_1, part_1]), + global_step) + ] + hook = opt.make_session_run_hook(is_chief, worker_id) + # Creates MonitoredSession + sess = training.MonitoredTrainingSession( + workers[worker_id].target, hooks=[hook]) + + sessions.append(sess) + graphs.append(graph) + train_ops.append(train_op) + + return sessions, graphs, train_ops + + +class AGNOptimizerTest(test.TestCase): + + def _run(self, train_op, sess): + sess.run(train_op) + + def test1Workers2Period(self): + num_workers = 1 + communication_period = 4 + num_ps = 1 + _, workers, _ = create_local_cluster( + num_workers=num_workers, num_ps=num_ps) + + sessions, graphs, train_ops = _get_workers( + num_workers, communication_period, workers) + + var_0 = graphs[0].get_tensor_by_name("v0:0") + var_1 = graphs[0].get_tensor_by_name("v1:0") + global_step = training_util.get_global_step(graphs[0]) + var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") + var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + + # Verify the initialized value. + self.assertAllEqual(0.0, sessions[0].run(var_0)) + self.assertAllEqual(0.5, sessions[0].run(var_1)) + self.assertAllEqual(0.0, sessions[0].run(var_0_g)) + self.assertAllEqual(0.5, sessions[0].run(var_1_g)) + self.assertAllEqual(0, sessions[0].run(global_step)) + # step 0 + sessions[0].run(train_ops[0]) + self.assertNear(0.1, sessions[0].run(var_0), 1e-6) + self.assertNear(0.6, sessions[0].run(var_1), 1e-6) + self.assertAllEqual(0.0, sessions[0].run(var_0_g)) + self.assertAllEqual(0.5, sessions[0].run(var_1_g)) + self.assertAllEqual(0, sessions[0].run(global_step)) + + # 2 & 3 + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + self.assertNear(0.3, sessions[0].run(var_0), 1e-6) + self.assertNear(0.8, sessions[0].run(var_1), 1e-6) + + # 4 + sessions[0].run(train_ops[0]) + # pull + self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g)) + self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g)) + self.assertNear(0.1, sessions[0].run(var_0), 1e-6) + self.assertNear(0.6, sessions[0].run(var_1), 1e-6) + + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g)) + self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g)) + self.assertNear(0.2, sessions[0].run(var_0), 1e-6) + self.assertNear(0.7, sessions[0].run(var_1), 1e-6) + + def test2Worker1Period(self): + num_workers = 2 + communication_period = 1 + num_ps = 2 + _, workers, _ = create_local_cluster( + num_workers=num_workers, num_ps=num_ps) + + sessions, graphs, train_ops = _get_workers( + num_workers, communication_period, workers, num_ps=2) + + var_0 = graphs[0].get_tensor_by_name("v0:0") + var_1 = graphs[0].get_tensor_by_name("v1:0") + + var_0_1 = graphs[1].get_tensor_by_name("v0:0") + var_1_1 = graphs[1].get_tensor_by_name("v1:0") + + var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") + var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + part_0_g = graphs[0].get_tensor_by_name( + GLOBAL_VARIABLE_NAME + "/partition_var/part_0:0") + part_1_g = graphs[0].get_tensor_by_name( + GLOBAL_VARIABLE_NAME + "/partition_var/part_1:0") + + # Verify the initialized value. + self.assertAllEqual(0.0, sessions[0].run(var_0)) + self.assertAllEqual(0.5, sessions[0].run(var_1)) + self.assertAllEqual(0.0, sessions[1].run(var_0_1)) + self.assertAllEqual(0.5, sessions[1].run(var_1_1)) + self.assertAllEqual(0.0, sessions[0].run(var_0_g)) + self.assertAllEqual(0.5, sessions[0].run(var_1_g)) + + # verify each step + sessions[0].run(train_ops[0]) + self.assertNear(0.1, sessions[0].run(var_0_g), 1e-6) + self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], + sessions[0].run(part_0_g), + 1e-6) + self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], + sessions[0].run(part_1_g), + 1e-6) + + sessions[1].run(train_ops[1]) + self.assertNear(0.2, sessions[0].run(var_0_g), 1e-6) + self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], + sessions[0].run(part_0_g), + 1e-6) + self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], + sessions[0].run(part_1_g), + 1e-6) + + sessions[0].run(train_ops[0]) + sessions[1].run(train_ops[1]) + + sessions[0].run(train_ops[0]) + sessions[1].run(train_ops[1]) + self.assertNear(0.6, sessions[0].run(var_0_g), 1e-6) + self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], + sessions[0].run(part_0_g), + 1e-6) + self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], + sessions[0].run(part_1_g), + 1e-6) + + def testAGNCustomGetter(self): + cluster_spec = server_lib.ClusterSpec({ + "ps": ["ps0:2222", "ps1:2222"], + "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] + }) + agn_getter = AGNCustomGetter(worker_device="/job:worker/task:0") + from tensorflow.python.training import device_setter + with ops.device( + device_setter.replica_device_setter(cluster=cluster_spec, + worker_device="/job:worker/task:0", + ps_device="/job:ps")), \ + variable_scope.variable_scope("", custom_getter=agn_getter): + v = variable_scope.get_variable(initializer=[1, 2], name="v") + w = variable_scope.get_variable(initializer=[2, 1], name="w") + v_g, w_g = agn_getter._global_map[v], agn_getter._global_map[w] + self.assertDeviceEqual("/job:worker/task:0", v.device) + self.assertDeviceEqual("job:ps/task:0", v_g.device) + self.assertDeviceEqual("/job:worker/task:0", w.device) + self.assertDeviceEqual("job:ps/task:1", w_g.device) + + +if __name__ == "__main__": + test.main() -- GitLab From 44dc83c18dfb8fff5525422e6c08a468aca4fb65 Mon Sep 17 00:00:00 2001 From: weidankong Date: Fri, 24 Aug 2018 11:52:18 -0700 Subject: [PATCH 0053/1357] AGN: clear unused imports --- tensorflow/contrib/opt/python/training/agn_optimizer.py | 4 ---- tensorflow/contrib/opt/python/training/agn_optimizer_test.py | 2 -- 2 files changed, 6 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index dc1f8d6347..dd058bc26e 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -16,12 +16,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import logging_ops @@ -31,7 +28,6 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import optimizer from tensorflow.python.training import session_run_hook -from tensorflow.python.training import gradient_descent GLOBAL_VARIABLE_NAME = 'global_center_variable' diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index 091943de02..4e2200fa1a 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -19,13 +19,11 @@ from __future__ import print_function import portpicker -from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import device_setter from tensorflow.python.training import momentum -- GitLab From f8ee9799e6a72d4fe24f9fad76d6e6b1b3a01af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 07:03:07 +0800 Subject: [PATCH 0054/1357] ENH: raise exception if unsupported features/columns is given --- .../python/estimator/canned/boosted_trees.py | 9 +- .../estimator/canned/boosted_trees_test.py | 97 +++++++++++-------- 2 files changed, 63 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index f2a5b9178b..66784fad0c 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -204,6 +204,9 @@ def _generate_feature_name_mapping(sorted_feature_columns): Returns: feature_name_mapping: a list of feature names indexed by the feature ids. + + Raises: + ValueError: when unsupported features/columns are tried. """ names = [] for column in sorted_feature_columns: @@ -221,8 +224,12 @@ def _generate_feature_name_mapping(sorted_feature_columns): else: for num in range(categorical_column._num_buckets): # pylint:disable=protected-access names.append('{}:{}'.format(column.name, num)) - else: + elif isinstance(column, feature_column_lib._BucketizedColumn): names.append(column.name) + else: + raise ValueError( + 'For now, only bucketized_column and indicator_column is supported ' + 'but got: {}'.format(column)) return names diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 7620f73425..14c05e024d 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -892,6 +892,49 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'all empty or contain only a root node'): est.experimental_feature_importances(normalize=True) + def testNegativeFeatureImportances(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + # In order to generate a negative feature importances, + # We assign an invalid value -1 to tree_weights here. + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 1 + left_id: 1 + right_id: 2 + } + metadata { + gain: 5.0 + } + } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } + } + tree_weights: -1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=True) + def testFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) @@ -1015,48 +1058,18 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances) - def testNegativeFeatureImportances(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=1, - max_depth=5) - - # In order to generate a negative feature importances, - # We assign an invalid value -1 to tree_weights here. - tree_ensemble_text = """ - trees { - nodes { - bucketized_split { - feature_id: 1 - left_id: 1 - right_id: 2 - } - metadata { - gain: 5.0 - } - } - nodes { - leaf { - scalar: -0.34 - } - } - nodes { - leaf { - scalar: 1.34 - } - } - } - tree_weights: -1.0 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto( - est, tree_ensemble_text) - - with self.assertRaisesRegexp(AssertionError, 'non-negative'): - est.experimental_feature_importances(normalize=False) - - with self.assertRaisesRegexp(AssertionError, 'non-negative'): - est.experimental_feature_importances(normalize=True) + def testFeatureImportancesNamesForUnsupportedColumn(self): + numeric_col = feature_column.numeric_column( + 'continuous', dtype=dtypes.float32) + + with self.assertRaisesRegexp(ValueError, + 'only bucketized_column and indicator_column'): + _ = boosted_trees.BoostedTreesRegressor( + feature_columns=[numeric_col], + n_batches_per_layer=1, + n_trees=2, + learning_rate=1.0, + max_depth=1) class ModelFnTests(test_util.TensorFlowTestCase): -- GitLab From 7e91ec68c7df088c306cc56cce621aee7ff53c94 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 24 Aug 2018 22:13:21 -0700 Subject: [PATCH 0055/1357] Added more unit tests and upgraded to the device-less bridge. --- WORKSPACE | 6 ++++++ tensorflow/workspace.bzl | 20 +++++++++--------- third_party/ngraph/ngraph_tf.BUILD | 34 ++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 4af1a1e75f..15aa24f3c1 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -86,3 +86,9 @@ new_local_repository( build_file = "//third_party/ngraph:ngraph.BUILD", ) +new_local_repository( + name = "ngraph_tf", + path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph-tf", + build_file = "//third_party/ngraph:ngraph_tf.BUILD", +) + diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 951cb8a89d..a5dc95d609 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -865,16 +865,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), ) - tf_http_archive( - name = "ngraph_tf", - urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - ], - sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", - strip_prefix = "ngraph-tf-0.3.0-rc1", - build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), - ) + # tf_http_archive( + # name = "ngraph_tf", + # urls = [ + # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", + # "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", + # ], + # sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", + # strip_prefix = "ngraph-tf-0.3.0-rc1", + # build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), + # ) ############################################################################## # BIND DEFINITIONS diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index d0231e468e..f40d2057e8 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -10,26 +10,35 @@ load( cc_library( name = "ngraph_tf", srcs = [ + "src/ngraph_assign_clusters.h", + "src/ngraph_assign_clusters.cc", "src/ngraph_builder.h", "src/ngraph_builder.cc", - "src/ngraph_cluster.h", - "src/ngraph_cluster.cc", + "src/ngraph_capture_variables.h", + "src/ngraph_capture_variables.cc", + "src/ngraph_conversions.h", "src/ngraph_cluster_manager.h", "src/ngraph_cluster_manager.cc", - "src/ngraph_confirm_pass.cc", - "src/ngraph_device.cc", + "src/ngraph_deassign_clusters.h", + "src/ngraph_deassign_clusters.cc", "src/ngraph_encapsulate_op.cc", - "src/ngraph_encapsulate_pass.cc", + "src/ngraph_encapsulate_clusters.h", + "src/ngraph_encapsulate_clusters.cc", "src/ngraph_freshness_tracker.h", "src/ngraph_freshness_tracker.cc", - "src/ngraph_graph_rewrite_passes.cc", - "src/ngraph_liberate_pass.cc", - "src/ngraph_op_kernels.cc", - "src/ngraph_stub_ops.cc", + # "src/ngraph_liberate_pass.cc", + # "src/ngraph_op_kernels.cc", + # "src/ngraph_stub_ops.cc", + "src/ngraph_mark_for_clustering.h", + "src/ngraph_mark_for_clustering.cc", + "src/ngraph_rewrite_pass.cc", + "src/ngraph_rewrite_for_tracking.h", + "src/ngraph_rewrite_for_tracking.cc", + "src/ngraph_tracked_variable.cc", "src/ngraph_utils.h", "src/ngraph_utils.cc", - "src/ngraph_send_recv_ops.cc", - "src/ngraph_variable_ops.cc", + # "src/ngraph_send_recv_ops.cc", + # "src/ngraph_variable_ops.cc", "src/tf_graphcycles.cc", "logging/ngraph_log.h", "logging/ngraph_log.cc", @@ -60,6 +69,9 @@ tf_cc_test( size = "small", srcs = [ "test/tf_exec.cpp", + "test/conversions.cpp", + "test/padding.cpp", + "test/graph_rewrites/assign_clusters.cc", "test/main.cpp", ], deps = [ -- GitLab From 7a54c15804f7bb0d0c40fea5c84b1f4acee58bac Mon Sep 17 00:00:00 2001 From: Stefan Dyulgerov Date: Sat, 25 Aug 2018 13:18:11 +0300 Subject: [PATCH 0056/1357] upgraded protobuf to v.3.6.1 --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index f56fb35a0f..56a57a2340 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG v3.6.0) +set(PROTOBUF_TAG v3.6.1) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") -- GitLab From 607004e583ecbd9fb788aaf9b360a8d85cf167ac Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 27 Aug 2018 13:12:23 -0700 Subject: [PATCH 0057/1357] AGN: remove compute_gradient --- .../opt/python/training/agn_optimizer.py | 38 +------------------ 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index dd058bc26e..f47ef5acc5 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -19,9 +19,7 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -134,40 +132,6 @@ class AGNOptimizer(optimizer.Optimizer): name='local_step') self._opt._prepare() - def compute_gradients(self, - loss, - var_list=None, - gate_gradients=optimizer.Optimizer.GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - grad_loss=None): - """Compute gradients of `loss` for the variables in `var_list`. - Args: - loss: A Tensor containing the value to minimize. - var_list: Optional list or tuple of `tf.Variable` to update to minimize - `loss`. Defaults to the list of variables collected in the graph - under the key `GraphKey.TRAINABLE_VARIABLES`. - gate_gradients: How to gate the computation of gradients. Can be - `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. - aggregation_method: Specifies the method used to combine gradient terms. - Valid values are defined in the class `AggregationMethod`. - colocate_gradients_with_ops: If True, try colocating gradients with - the corresponding op. - grad_loss: Optional. A `Tensor` holding the gradient computed for `loss` - - Returns: - A list of (gradient, variable) pairs. Variable is always present, but - gradient can be `None`. - """ - if not var_list: - var_list = variables.trainable_variables() - return self._opt.compute_gradients(loss, - var_list, - gate_gradients, - aggregation_method, - colocate_gradients_with_ops, - grad_loss) - def _adjust_optimizer_variable_collection(self, opt_vars): """ Move optimizer created variables to local collection """ @@ -268,7 +232,7 @@ class AGNOptimizer(optimizer.Optimizer): raise ValueError('The lists of local_variables, global_center_variables,' 'grad_center_variables should not be empty') for lvar, gc_var in zip(local_vars, global_center_vars): - init_ops.append(state_ops.assign(gc_var, lvar)) + init_ops.append(state_ops.assign(lvar, gc_var)) for g in grad_vars: init_ops.append(state_ops.assign(g, array_ops.zeros_like(g))) init_op = control_flow_ops.group(*(init_ops)) -- GitLab From 8d226fe074d18aadf98a869755e7d432341ba882 Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 27 Aug 2018 15:59:54 -0700 Subject: [PATCH 0058/1357] AGN: use variable_creator_scope to move variables from GLOBAL_VARIABLES to LOCAL VARIABLES --- .../contrib/opt/python/training/agn_optimizer.py | 15 ++++++++++----- .../opt/python/training/agn_optimizer_test.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index f47ef5acc5..8f415c75b9 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -166,12 +166,17 @@ class AGNOptimizer(optimizer.Optimizer): """ local_vars = [v for g, v in grads_and_vars if g is not None] grads = [g for g, v in grads_and_vars if g is not None] + def _variable_creator(next_creator, collections, **kwargs): + if not collections: + collections = [ops.GraphKeys.LOCAL_VARIABLES] + elif ops.GraphKeys.GLOBAL_VARIABLES in collections: + collections = list(collections) + collections.append(ops.GraphKeys.LOCAL_VARIABLES) + collections.remove(ops.GraphKeys.GLOBAL_VARIABLES) + return next_creator(collections=collections, **kwargs) # theta = theta - lr * grad - global_old = set(n.op.name for n in variables.global_variables()) - local_update_op = self._opt.apply_gradients(grads_and_vars) - global_new = set(n.op.name for n in variables.global_variables()) - - self._adjust_optimizer_variable_collection(global_new - global_old) + with variable_scope.variable_creator_scope(_variable_creator): + local_update_op = self._opt.apply_gradients(grads_and_vars) # a = a + grad update_ops = [] diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index 4e2200fa1a..a2302d2f11 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -23,10 +23,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variables from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test from tensorflow.python.training import device_setter -from tensorflow.python.training import momentum +from tensorflow.python.training import adam from tensorflow.python.training import server_lib from tensorflow.python.training import training from tensorflow.python.training import training_util @@ -100,7 +101,7 @@ def _get_workers(num_workers, period, workers, num_ps=1): grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]]) optimizer = \ - momentum.MomentumOptimizer(learning_rate=0.1, momentum=0.0) + adam.AdamOptimizer(learning_rate=0.1, beta1=0.0, beta2=0.0) opt = AGNOptimizer( optimizer, num_worker=num_workers, @@ -152,6 +153,13 @@ class AGNOptimizerTest(test.TestCase): var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + # verify adam/beta variables not in global collection + with graphs[0].as_default(): + for ele in variables.global_variables(): + self.assertTrue(ele.op.name.find('beta') < 0) + if ele.op.name.find('global_center_variable') < 0: + self.assertTrue(ele.op.name.find('Adam') < 0) + # Verify the initialized value. self.assertAllEqual(0.0, sessions[0].run(var_0)) self.assertAllEqual(0.5, sessions[0].run(var_1)) -- GitLab From 540ca4a8755a3670920b49647860d085df834a00 Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 27 Aug 2018 17:03:47 -0700 Subject: [PATCH 0059/1357] AGN: fix Sanity test --- .../opt/python/training/agn_optimizer.py | 19 +--------- .../opt/python/training/agn_optimizer_test.py | 37 ++++++++++--------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index 8f415c75b9..9fb5be56e6 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -19,7 +19,6 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -132,20 +131,6 @@ class AGNOptimizer(optimizer.Optimizer): name='local_step') self._opt._prepare() - def _adjust_optimizer_variable_collection(self, opt_vars): - """ Move optimizer created variables to local collection - """ - g = ops.get_default_graph() - idx = 0 - for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): - var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] - name = var.op.name - if name in opt_vars: - ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) - del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] - else: - idx += 1 - def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to global variables. @@ -182,7 +167,7 @@ class AGNOptimizer(optimizer.Optimizer): update_ops = [] update_ops.append(local_update_op) grad_vars = [self._grad_map[var] for var in local_vars] - for g, grad_var in zip (grads, grad_vars): + for g, grad_var in zip(grads, grad_vars): update_ops.append(state_ops.assign_add(grad_var, g)) global_center_vars = [self._global_map[var] for var in local_vars] @@ -215,7 +200,7 @@ class AGNOptimizer(optimizer.Optimizer): return variable_update local_update = state_ops.assign_add( - self._local_step, 1, name='local_step_update').op + self._local_step, 1, name='local_step_update').op with ops.control_dependencies([local_update]): condition = math_ops.equal( diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index a2302d2f11..28732c2a1d 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -72,9 +72,9 @@ def _get_workers(num_workers, period, workers, num_ps=1): with graph.as_default(): worker_device = "/job:worker/task:%d/cpu:0" % (worker_id) ps_device = device_setter.replica_device_setter( - worker_device=worker_device, - ps_device="/job:ps/task:0/cpu:0", - ps_tasks=1) + worker_device=worker_device, + ps_device="/job:ps/task:0/cpu:0", + ps_tasks=1) agn_getter = AGNCustomGetter(worker_device=worker_device) with variable_scope.variable_scope( "", custom_getter=agn_getter), ops.device(ps_device): @@ -82,7 +82,8 @@ def _get_workers(num_workers, period, workers, num_ps=1): var_0 = variable_scope.get_variable(initializer=0.0, name="v0") var_1 = variable_scope.get_variable(initializer=0.5, name="v1") if num_ps > 1: - with variable_scope.variable_scope("", + with variable_scope.variable_scope( + "", partitioner=partitioned_variables.fixed_size_partitioner( num_ps, axis=0), custom_getter=agn_getter), ops.device(ps_device): @@ -109,12 +110,12 @@ def _get_workers(num_workers, period, workers, num_ps=1): custom_getter=agn_getter) if num_ps == 1: train_op = [ - opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), global_step) ] else: train_op = [ - opt.apply_gradients(([grads_0, var_0], + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1], [grads_part_0, part_0], [grads_part_1, part_1]), @@ -232,20 +233,20 @@ class AGNOptimizerTest(test.TestCase): sessions[0].run(train_ops[0]) self.assertNear(0.1, sessions[0].run(var_0_g), 1e-6) self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], - sessions[0].run(part_0_g), - 1e-6) + sessions[0].run(part_0_g), + 1e-6) self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], - sessions[0].run(part_1_g), - 1e-6) + sessions[0].run(part_1_g), + 1e-6) sessions[1].run(train_ops[1]) self.assertNear(0.2, sessions[0].run(var_0_g), 1e-6) self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], - sessions[0].run(part_0_g), - 1e-6) + sessions[0].run(part_0_g), + 1e-6) self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], - sessions[0].run(part_1_g), - 1e-6) + sessions[0].run(part_1_g), + 1e-6) sessions[0].run(train_ops[0]) sessions[1].run(train_ops[1]) @@ -254,11 +255,11 @@ class AGNOptimizerTest(test.TestCase): sessions[1].run(train_ops[1]) self.assertNear(0.6, sessions[0].run(var_0_g), 1e-6) self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], - sessions[0].run(part_0_g), - 1e-6) + sessions[0].run(part_0_g), + 1e-6) self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], - sessions[0].run(part_1_g), - 1e-6) + sessions[0].run(part_1_g), + 1e-6) def testAGNCustomGetter(self): cluster_spec = server_lib.ClusterSpec({ -- GitLab From 6b25c37daaa6a063b6b687252343db5453a84b8b Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Mon, 27 Aug 2018 19:15:36 -0700 Subject: [PATCH 0060/1357] Added new version of the bridge that supports deviceless operation. --- third_party/ngraph/ngraph_tf.BUILD | 7 ------- 1 file changed, 7 deletions(-) diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index f40d2057e8..c1221cc385 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -26,9 +26,6 @@ cc_library( "src/ngraph_encapsulate_clusters.cc", "src/ngraph_freshness_tracker.h", "src/ngraph_freshness_tracker.cc", - # "src/ngraph_liberate_pass.cc", - # "src/ngraph_op_kernels.cc", - # "src/ngraph_stub_ops.cc", "src/ngraph_mark_for_clustering.h", "src/ngraph_mark_for_clustering.cc", "src/ngraph_rewrite_pass.cc", @@ -37,8 +34,6 @@ cc_library( "src/ngraph_tracked_variable.cc", "src/ngraph_utils.h", "src/ngraph_utils.cc", - # "src/ngraph_send_recv_ops.cc", - # "src/ngraph_variable_ops.cc", "src/tf_graphcycles.cc", "logging/ngraph_log.h", "logging/ngraph_log.cc", @@ -58,7 +53,6 @@ cc_library( "-I external/ngraph_tf/src", "-I external/ngraph_tf/logging", "-I external/ngraph/src", - #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", ], alwayslink = 1, visibility = ["//visibility:public"], @@ -83,7 +77,6 @@ tf_cc_test( ], extra_copts = [ "-fexceptions ", - #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", "-I external/ngraph_tf/src", "-I external/ngraph_tf/logging", "-I external/ngraph/src", -- GitLab From ccb1af57af2532dfee1af73899d1970ac7a263e4 Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Tue, 28 Aug 2018 12:33:41 +0900 Subject: [PATCH 0061/1357] update golden & pylint --- .../python/kernel_tests/extract_volume_patches_op_test.py | 1 + tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py index 215474f6db..64757a3e07 100644 --- a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py +++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py @@ -54,6 +54,7 @@ class ExtractVolumePatches(test.TestCase): name="im2col_3d") self.assertAllClose(patches, out_tensor.eval()) + # pylint: disable=bad-whitespace def testKsize1x1x1Stride1x1x1(self): """Verifies that for 1x1x1 kernel the output equals the input.""" image = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6]) + 1 diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 4f19627691..ba928eba9e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1060,6 +1060,10 @@ tf_module { name: "extract_image_patches" argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "extract_volume_patches" + argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "eye" argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\'], " -- GitLab From eafc3914b0356e013b888fb103d20a76faf5ee5c Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Tue, 28 Aug 2018 20:49:09 +0900 Subject: [PATCH 0062/1357] change golden/v2/tensorflow.pbtxt Running the API compatibility test only checks for pbtxt files under directory v1. Manually added extract_volume_patches under v2 as extract_image_patches is registered under v2 as well. --- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 5eb42b4db3..f7e63978da 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -1060,6 +1060,10 @@ tf_module { name: "extract_image_patches" argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "extract_volume_patches" + argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "eye" argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\'], " -- GitLab From 40aee739c3d5c7aee63020f36b83aded09044efb Mon Sep 17 00:00:00 2001 From: weidankong Date: Tue, 28 Aug 2018 10:09:13 -0700 Subject: [PATCH 0063/1357] AGN: fix sanity failure --- .../contrib/opt/python/training/agn_optimizer_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index 28732c2a1d..fc291f829f 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -111,15 +111,15 @@ def _get_workers(num_workers, period, workers, num_ps=1): if num_ps == 1: train_op = [ opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), - global_step) + global_step) ] else: train_op = [ opt.apply_gradients(([grads_0, var_0], - [grads_1, var_1], - [grads_part_0, part_0], - [grads_part_1, part_1]), - global_step) + [grads_1, var_1], + [grads_part_0, part_0], + [grads_part_1, part_1]), + global_step) ] hook = opt.make_session_run_hook(is_chief, worker_id) # Creates MonitoredSession -- GitLab From 66b27b0f4c3541268007b251885f8db424147e66 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Tue, 28 Aug 2018 18:46:45 -0700 Subject: [PATCH 0064/1357] Added comments. --- third_party/ngraph/ngraph.BUILD | 2 -- third_party/ngraph/tbb.BUILD | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD index 3d9c3ac044..426d49c542 100644 --- a/third_party/ngraph/ngraph.BUILD +++ b/third_party/ngraph/ngraph.BUILD @@ -124,8 +124,6 @@ cc_library( "src/ngraph/pass/*.hpp", "src/ngraph/runtime/*.cpp", "src/ngraph/type/*.cpp", - #"src/ngraph/runtime/interpreter/*.cpp", - #"src/ngraph/runtime/interpreter/*.hpp", ]), deps = [ ":ngraph_headers", diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD index 7c760cb3b3..e2096e48af 100644 --- a/third_party/ngraph/tbb.BUILD +++ b/third_party/ngraph/tbb.BUILD @@ -3,8 +3,11 @@ licenses(["notice"]) # 3-Clause BSD exports_files(["LICENSE"]) # Taken from: https://github.com/rnburn/satyr/blob/master/bazel/tbb.BUILD -# License: MIT +# License for this BUILD file: MIT # See: https://github.com/rnburn/satyr/blob/master/LICENSE +# +# License for TBB: Apache 2.0 +# See: https://github.com/01org/tbb/blob/tbb_2018/LICENSE genrule( name = "build_tbb", -- GitLab From e93a9f9ccfd9c7a2419bf3fc1d7866765bbcfce3 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 28 Aug 2018 18:55:51 -0700 Subject: [PATCH 0065/1357] Update GPU occupancy checking to utilize CUDA's occupancy calculator functions -Replace references to the UnqueryableDeviceParams struct with calls to CUDA's built-in occupancy calculation functions -Update calls to the occupancy checking functions with the new changes -Changes should provide more long-term reliability and will remove the need to manually update hardcoded data values for new GPU architectures --- .../xla/service/gpu/partition_assignment.cc | 9 +- .../stream_executor/cuda/cuda_gpu_executor.cc | 192 ++---------------- .../stream_executor/device_description.cc | 98 +++------ .../stream_executor/device_description.h | 73 ++----- 4 files changed, 61 insertions(+), 311 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc index cf9f102d31..375f68a159 100644 --- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc @@ -62,13 +62,8 @@ LaunchDimensions CalculateLaunchDimensions( // // * = - auto threads_per_core = device_desc.threads_per_core_limit(); - auto blocks_per_core = device_desc.blocks_per_core_limit(); - int64 threads_per_block; - if (threads_per_core != 0 && blocks_per_core != 0) { - threads_per_block = device_desc.threads_per_core_limit() / - device_desc.blocks_per_core_limit(); - } else { + int64 threads_per_block = device_desc.threads_per_block_limit(); + if (threads_per_block == 0) { static std::atomic log_count{0}; if (log_count.fetch_add(1) < 8) { LOG(WARNING) << "Attempting to calculate launch dimensions for GPU " diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index e30f50ea2a..39b0696c93 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -467,33 +467,26 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, return; } + int block_size = thread_dims.x * thread_dims.y * thread_dims.z; + const DeviceDescription &device_description = kernel.parent()->GetDeviceDescription(); - uint64 blocks_per_sm = CalculateOccupancy( - device_description, regs_per_thread, smem_per_block, thread_dims); - VLOG(2) << "Resident blocks per SM is " << blocks_per_sm; + const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel); + CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue(); - // To increase occupancy, there must be a sufficient number of blocks - // available to spread across the sm's at this new improved occupancy level. - int multiprocessor_count = device_description.core_count(); - int block_count = block_dims.x * block_dims.y * block_dims.z; - int available_blocks_per_sm = - port::MathUtil::CeilOfRatio(block_count, multiprocessor_count); - if (available_blocks_per_sm <= static_cast(blocks_per_sm)) { - VLOG(2) << "Occupancy is limited by number of blocks available per sm."; - return; - } + int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread, + smem_per_block, thread_dims, cufunc); + VLOG(2) << "Resident blocks per SM is " << blocks_per_sm; - uint64 improved_regs_per_thread = CalculateRegisterLimitForTargetOccupancy( - device_description, smem_per_block, thread_dims, blocks_per_sm + 1); - if (improved_regs_per_thread != 0) { - VLOG(2) << "Reducing register usage from " << regs_per_thread - << " to " << improved_regs_per_thread - << " could increase resident blocks per SM by one."; - } else { - VLOG(2) << "Resident blocks per SM cannot be increased by reducing " - "register usage."; + int suggested_threads = + CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread, + smem_per_block, thread_dims, cufunc); + if (suggested_threads != 0) { + VLOG(2) << "The cuda occupancy calculator reccommends using " + << suggested_threads + << " threads per block to acheive an occupancy of " << blocks_per_sm + << " blocks per SM."; } } @@ -980,144 +973,6 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) { #endif } -// Set of compute capability specific device parameters that cannot be -// queried from the driver API. These values instead are baked into a -// lookup table indexed by compute capability version. -struct UnqueryableDeviceParams { - int cc_major; - int cc_minor; - uint64 blocks_per_core_limit; - uint64 registers_per_core_limit; - uint64 registers_per_thread_limit; - uint64 warp_alloc_granularity; - uint64 register_alloc_granularity; - uint64 shared_memory_alloc_granularity; -}; - -// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities -// https://developer.download.nvidia.com/compute/cuda/CUDA_Occupancy_calculator.xls -static const UnqueryableDeviceParams kAllUnqueryableDeviceParams[] = { - { - 2, 0, // compute capability (2.0) - 8, // blocks_per_core_limit - 32 * 1024, // registers_per_core_limit - 63, // registers_per_thread_limit - 2, // warp_alloc_granularity - 64, // register_alloc_granularity - 128, // shared_memory_alloc_granularity - }, - { - 2, 1, // compute capability (2.1) - 8, // blocks_per_core_limit - 32 * 1024, // registers_per_core_limit - 63, // registers_per_thread_limit - 2, // warp_alloc_granularity - 64, // register_alloc_granularity - 128, // shared_memory_alloc_granularity - }, - { - 3, 0, // compute capability (3.0) - 16, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 63, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 3, 2, // compute capability (3.2) - 16, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 3, 5, // compute capability (3.5) - 16, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 3, 7, // compute capability (3.7) - 16, // blocks_per_core_limit - 128 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 5, 0, // compute capability (5.0) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 5, 2, // compute capability (5.2) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 5, 3, // compute capability (5.3) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 6, 0, // compute capability (6.0) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 2, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 6, 1, // compute capability (6.1) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 6, 2, // compute capability (6.2) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - // TODO(jlebar): Confirm the alloc granularity values for sm_70. These are - // not published in the spreadsheet linked above. Currently we guess that - // they're the same as sm_60. - { - 7, 0, // compute capability (7.0) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 2, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, -}; DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { internal::DeviceDescriptionBuilder builder; @@ -1193,19 +1048,6 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_name(device_name); } - for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { - const auto ¶ms = kAllUnqueryableDeviceParams[i]; - if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) { - builder.set_blocks_per_core_limit(params.blocks_per_core_limit); - builder.set_registers_per_core_limit(params.registers_per_core_limit); - builder.set_registers_per_thread_limit(params.registers_per_thread_limit); - builder.set_warp_alloc_granularity(params.warp_alloc_granularity); - builder.set_register_alloc_granularity(params.register_alloc_granularity); - builder.set_shared_memory_alloc_granularity( - params.shared_memory_alloc_granularity); - } - } - builder.set_platform_version( port::StrCat("Compute Capability ", cc_major_, ".", cc_minor_)); @@ -1227,6 +1069,10 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { CUDADriver::GetMaxRegistersPerBlock(device_).ValueOrDie()); builder.set_threads_per_warp( CUDADriver::GetThreadsPerWarp(device_).ValueOrDie()); + builder.set_registers_per_core_limit( + CUDADriver::GetDeviceAttribute( + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, device_) + .ValueOrDie()); auto built = builder.Build(); return built.release(); diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index 8ca0677f8a..df52ce6cce 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -37,16 +37,11 @@ DeviceDescription::DeviceDescription() kUninitializedUint64), block_dim_limit_(kUninitializedUint64, kUninitializedUint64, kUninitializedUint64), - blocks_per_core_limit_(kUninitializedUint64), threads_per_core_limit_(kUninitializedUint64), threads_per_block_limit_(kUninitializedUint64), threads_per_warp_(kUninitializedUint64), registers_per_core_limit_(kUninitializedUint64), registers_per_block_limit_(kUninitializedUint64), - registers_per_thread_limit_(kUninitializedUint64), - warp_alloc_granularity_(1), - register_alloc_granularity_(1), - shared_memory_alloc_granularity_(1), device_address_bits_(kUninitializedUint64), device_memory_size_(kUninitializedUint64), memory_bandwidth_(kUninitializedUint64), @@ -162,75 +157,36 @@ static uint64 RoundDown(uint64 value, uint64 n) { return port::MathUtil::FloorOfRatio(value, n) * n; } -uint64 CalculateOccupancy(const DeviceDescription &device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim &thread_dims) { - // Don't try to compute occupancy if necessary values are not initialized. - uint64 required_fields[] = { device_description.registers_per_thread_limit(), - device_description.threads_per_warp(), - device_description.warp_alloc_granularity(), - device_description.register_alloc_granularity(), - device_description.registers_per_block_limit(), - device_description.shared_memory_per_core(), - device_description.blocks_per_core_limit() }; - for (auto value : required_fields) { - if (value == kUninitializedUint64) { - return 0; - } - } - - if (registers_per_thread > device_description.registers_per_thread_limit()) { - return 0; - } - - uint64 warps_per_block = - port::MathUtil::CeilOfRatio(thread_dims.x * thread_dims.y * thread_dims.z, - device_description.threads_per_warp()); - - // Warp resources are allocated at a particular granularity. This value is - // the effective number of warps for resource allocation purposes. - uint64 alloc_warps_per_block = - RoundUp(warps_per_block, device_description.warp_alloc_granularity()); - - uint64 alloc_regs_per_warp = - RoundUp(device_description.threads_per_warp() * registers_per_thread, - device_description.register_alloc_granularity()); - uint64 regs_per_block = alloc_warps_per_block * alloc_regs_per_warp; - uint64 reg_limit = - device_description.registers_per_block_limit() / regs_per_block; - - uint64 alloc_smem_per_block = RoundUp( - shared_memory_per_block, - device_description.shared_memory_alloc_granularity()); - uint64 smem_limit = alloc_smem_per_block > 0 ? - device_description.shared_memory_per_core() / alloc_smem_per_block : - device_description.blocks_per_core_limit(); - - uint64 thread_limit = device_description.threads_per_core_limit() - / (warps_per_block * device_description.threads_per_warp()); - - return std::min({ device_description.blocks_per_core_limit(), - reg_limit, smem_limit, thread_limit }); +int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + return suggested_blocks; } -uint64 CalculateRegisterLimitForTargetOccupancy( - const DeviceDescription &device_description, uint64 shared_memory_per_block, - const ThreadDim &thread_dims, uint64 target_blocks_per_core) { - // Linear search from maximum number of registers down until the target - // blocks per SM is found. - // TODO(meheff): Compute this using a closed form solution. - int reg_step = device_description.register_alloc_granularity() / - device_description.threads_per_warp(); - for (int r = device_description.registers_per_thread_limit(); r > 0; - r = RoundDown(r - 1, reg_step)) { - uint64 occupancy = CalculateOccupancy( - device_description, r, shared_memory_per_block, thread_dims); - if (occupancy >= target_blocks_per_core) { - return r; - } +int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + if (suggested_blocks > *initial_blocks) { + *initial_blocks = suggested_blocks; + return suggested_threads; + } else { + return 0; } - return 0; } } // namespace stream_executor diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index 7f99d81ef3..d335b9b875 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -24,6 +24,7 @@ limitations under the License. #include #include "tensorflow/stream_executor/platform/port.h" +#include "tensorflow/stream_executor/cuda/cuda_driver.h" #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/platform/port.h" @@ -79,10 +80,6 @@ class DeviceDescription { // legitimate kernel launch request. const BlockDim &block_dim_limit() const { return block_dim_limit_; } - // Returns the limit on the number of simultaneously resident blocks - // on a multiprocessor. - uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; } - // Returns the limit on the total number of threads that can be launched in a // single block; i.e. the limit on x * y * z dimensions of a ThreadDim. // This limit affects what constitutes a legitimate kernel launch request. @@ -110,27 +107,6 @@ class DeviceDescription { return registers_per_block_limit_; } - // Returns the limit on the total number of registers that can be - // allocated to a thread. - const uint64 ®isters_per_thread_limit() const { - return registers_per_thread_limit_; - } - - // Returns the granularity at which warps are allocated resources. - const uint64 &warp_alloc_granularity() const { - return warp_alloc_granularity_; - } - - // Returns the granularity at which registers are allocated to warps. - const uint64 ®ister_alloc_granularity() const { - return register_alloc_granularity_; - } - - // Returns the granularity at which shared memory is allocated to warps. - const uint64 &shared_memory_alloc_granularity() const { - return shared_memory_alloc_granularity_; - } - // Returns the number of address bits available to kernel code running on the // platform. This affects things like the maximum allocation size and perhaps // types used in kernel code such as size_t. @@ -200,19 +176,12 @@ class DeviceDescription { ThreadDim thread_dim_limit_; BlockDim block_dim_limit_; - uint64 blocks_per_core_limit_; - uint64 threads_per_core_limit_; uint64 threads_per_block_limit_; uint64 threads_per_warp_; uint64 registers_per_core_limit_; uint64 registers_per_block_limit_; - uint64 registers_per_thread_limit_; - - uint64 warp_alloc_granularity_; - uint64 register_alloc_granularity_; - uint64 shared_memory_alloc_granularity_; uint64 device_address_bits_; uint64 device_memory_size_; @@ -270,10 +239,6 @@ class DeviceDescriptionBuilder { device_description_->block_dim_limit_ = value; } - void set_blocks_per_core_limit(uint64 value) { - device_description_->blocks_per_core_limit_ = value; - } - void set_threads_per_core_limit(uint64 value) { device_description_->threads_per_core_limit_ = value; } @@ -290,19 +255,6 @@ class DeviceDescriptionBuilder { void set_registers_per_block_limit(uint64 value) { device_description_->registers_per_block_limit_ = value; } - void set_registers_per_thread_limit(uint64 value) { - device_description_->registers_per_thread_limit_ = value; - } - - void set_warp_alloc_granularity(uint64 value) { - device_description_->warp_alloc_granularity_ = value; - } - void set_register_alloc_granularity(uint64 value) { - device_description_->register_alloc_granularity_ = value; - } - void set_shared_memory_alloc_granularity(uint64 value) { - device_description_->shared_memory_alloc_granularity_ = value; - } void set_device_address_bits(uint64 value) { device_description_->device_address_bits_ = value; @@ -375,17 +327,18 @@ void CalculateDimensionality(const DeviceDescription &device_description, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -uint64 CalculateOccupancy(const DeviceDescription &device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim &thread_dims); - -// Compute and return the maximum number of registers per thread which -// achieves the target occupancy. If the target is not possible then -// zero is returned. -uint64 CalculateRegisterLimitForTargetOccupancy( - const DeviceDescription &device_description, uint64 shared_memory_per_block, - const ThreadDim &thread_dims, uint64 target_blocks_per_core); +int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + +// Compute and return the suggested thread count to acheive ideal occupancy. +// If the provided thread dimensions match this number, zero is returned. +int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); } // namespace stream_executor -- GitLab From 4e72dd865a3fc83baa69f6b7c08720a1b546a464 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 29 Aug 2018 17:05:43 +0800 Subject: [PATCH 0066/1357] Refine LeakyRelu codes. 1. Add C++ gradient of gradient definition of LeakyReLu and revalant UT. 2. Using forward compatibility layer for python code changes. --- tensorflow/cc/gradients/nn_grad.cc | 18 ++++- tensorflow/cc/gradients/nn_grad_test.cc | 16 +++++ .../python/kernel_tests/relu_op_test.py | 70 ++++++++++--------- tensorflow/python/ops/nn_ops.py | 5 +- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 0fc23d0bf7..2a32a2ed6f 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -149,13 +149,27 @@ Status LeakyReluGradHelper(const Scope& scope, const Operation& op, float alpha; TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); internal::LeakyReluGrad::Attrs attrs; - attrs.Alpha(alpha); - auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs); + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), + attrs.Alpha(alpha)); grad_outputs->push_back(dx); return scope.status(); } REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); +Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index 5ebece7b6e..bf0db1f59d 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/gradient_checker.h" #include "tensorflow/cc/framework/testutil.h" #include "tensorflow/cc/gradients/grad_testutil.h" +#include "tensorflow/cc/ops/nn_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -173,6 +174,21 @@ TEST_F(NNGradTest, LeakyReluGrad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGradGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, + {5, 2}); + Tensor features = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + auto y = ops::internal::LeakyReluGrad(scope_, x, features); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index ccb3a231bb..7066f28883 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.compat import compat from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -283,8 +284,9 @@ class LeakyReluTest(test.TestCase): np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), alpha=0.1, use_gpu=True) - # The gradient test for ReLU is a bit tricky as the derivative is not well - # defined at around zero and we want to avoid that in terms of input values. + # The gradient test for Leaky ReLU is a bit tricky as the derivative is not + # well defined at around zero and we want to avoid that in terms of input + # values. def testGradientFloat32(self): with self.test_session(): x = constant_op.constant( @@ -319,39 +321,41 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradGradFloat32(self): - with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - name="x") - y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float32, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) - print("leaky_relu (float32) gradient of gradient err = ", err) - self.assertLess(err, 1e-4) + with compat.forward_compatibility_horizon(2018, 10, 2): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient of gradient err = ", err) + self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - dtype=dtypes.float64, - name="x") - y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float64, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) - print("leaky_relu (float64) gradient of gradient err = ", err) - self.assertLess(err, 1e-10) + with compat.forward_compatibility_horizon(2018, 10, 2): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient of gradient err = ", err) + self.assertLess(err, 1e-10) def testGradientScalar(self): with self.test_session() as sess: diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 31b8f3945d..52ea202636 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,7 +1601,10 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) + if compat.forward_compatible(2018, 10, 1): + return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) + alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") + return math_ops.maximum(alpha * features, features, name=name) def _flatten_outer_dims(logits): -- GitLab From bb45e28b207f9a0d56f1b4a0d372b267e216ad04 Mon Sep 17 00:00:00 2001 From: Naurril Date: Wed, 29 Aug 2018 22:45:38 +0800 Subject: [PATCH 0067/1357] Code formatted --- tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc index 0f853ae52a..6af4ca4d96 100644 --- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -51,7 +51,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass { for (Node* n : matches) { AttrSlice n_attrs = n->attrs(); auto base_make_node = [n, &n_attrs](const string& op, - const string& name) { + const string& name) { NodeBuilder node_builder(name, op); node_builder.Device(n->requested_device()); string colo; -- GitLab From 1b166c7e6f30bf7179f31764b3615e63025a7472 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Fri, 20 Jul 2018 19:03:55 +0000 Subject: [PATCH 0068/1357] Rename CUDA GPU ID to platform GPU ID Rename CUDA GPU ID to platform GPU ID so the notion is applicable on both CUDA and ROCm platform. --- .../contrib/tensorrt/convert/convert_graph.cc | 8 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 13 +- .../common_runtime/gpu/gpu_bfc_allocator.cc | 11 +- .../common_runtime/gpu/gpu_bfc_allocator.h | 6 +- .../gpu/gpu_bfc_allocator_test.cc | 30 +-- .../gpu/gpu_cudamalloc_allocator.cc | 5 +- .../gpu/gpu_cudamalloc_allocator.h | 2 +- .../common_runtime/gpu/gpu_debug_allocator.cc | 10 +- .../common_runtime/gpu/gpu_debug_allocator.h | 4 +- .../gpu/gpu_debug_allocator_test.cc | 59 ++--- .../core/common_runtime/gpu/gpu_device.cc | 224 ++++++++++-------- .../core/common_runtime/gpu/gpu_device.h | 22 +- .../common_runtime/gpu/gpu_device_test.cc | 19 +- tensorflow/core/common_runtime/gpu/gpu_id.h | 32 +-- .../core/common_runtime/gpu/gpu_id_manager.cc | 38 +-- .../core/common_runtime/gpu/gpu_id_manager.h | 12 +- .../common_runtime/gpu/gpu_id_manager_test.cc | 32 +-- .../core/common_runtime/gpu/gpu_id_utils.h | 37 +-- .../common_runtime/gpu/gpu_process_state.cc | 15 +- .../core/grappler/clusters/single_machine.cc | 6 +- tensorflow/core/grappler/clusters/utils.cc | 13 +- tensorflow/core/grappler/clusters/utils.h | 2 +- .../core/grappler/clusters/utils_test.cc | 22 +- tensorflow/core/grappler/costs/utils.cc | 8 +- tensorflow/core/protobuf/config.proto | 2 +- 25 files changed, 333 insertions(+), 299 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b019c99882..f29f4d6deb 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -780,12 +780,12 @@ std::pair GetDeviceAndAllocator( // If device is not set, use the first found GPU device for the conversion. for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) { TfGpuId tf_gpu_id(tf_gpu_id_value); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (s.ok()) { VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device " - << cuda_gpu_id.value(); - cuda_device_id = cuda_gpu_id.value(); + << platform_gpu_id.value(); + cuda_device_id = platform_gpu_id.value(); GPUOptions gpu_options; // If the TF to Cuda gpu id mapping exist, the device and corresponding // allocator must have been initialized already, so the diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 2b42d81f47..88cf8d5980 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -565,21 +565,22 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources( new TRTInt8Calibrator(device_buffers_, batch_size, name())); const string label(name()); auto segment_graph = &segment_graph_; - const int cuda_gpu_id = ctx->device()->tensorflow_gpu_device_info()->gpu_id; - if (cuda_gpu_id < 0) { + const int platform_gpu_id = + ctx->device()->tensorflow_gpu_device_info()->gpu_id; + if (platform_gpu_id < 0) { LOG(ERROR) << "Can't get gpu_device_info from context->device()"; return tensorflow::errors::InvalidArgument( "Context->device doesn't contain device info!"); } const int64 workspace_size_bytes = workspace_size_; cres->thr_.reset(new std::thread([cres, label, segment_graph, shapes, - cuda_gpu_id, workspace_size_bytes]() { - VLOG(0) << "Starting calibration thread on device " << cuda_gpu_id + platform_gpu_id, workspace_size_bytes]() { + VLOG(0) << "Starting calibration thread on device " << platform_gpu_id << ", Calibration Resource @ " << cres; - auto err = cudaSetDevice(cuda_gpu_id); + auto err = cudaSetDevice(platform_gpu_id); if (err != cudaSuccess) { // TODO(aaroey): should return error here. - LOG(ERROR) << "Couldn't set cuda device to " << cuda_gpu_id + LOG(ERROR) << "Couldn't set cuda device to " << platform_gpu_id << " in calibration thread"; } // ConvertGraphDefToEngine() will try to build the engine. This thread diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc index 2d4c8d0201..c8db384b64 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc @@ -22,16 +22,17 @@ limitations under the License. namespace tensorflow { -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const string& name) - : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {} +GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id, + size_t total_memory, const string& name) + : GPUBFCAllocator(platform_gpu_id, total_memory, GPUOptions(), name) {} -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, +GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id, + size_t total_memory, const GPUOptions& gpu_options, const string& name) : BFCAllocator( new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(), gpu_options.per_process_gpu_memory_fraction() > 1.0 || gpu_options.experimental().use_unified_memory()), total_memory, gpu_options.allow_growth(), name) {} diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index f1cc2eace1..435ffb4959 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -35,11 +35,11 @@ namespace tensorflow { // algorithm. class GPUBFCAllocator : public BFCAllocator { public: - // 'cuda_gpu_id' refers to the ID of the GPU device within + // 'platform_gpu_id' refers to the ID of the GPU device within // the process and must reference a valid ID in the process. - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory, const string& name); - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory, const GPUOptions& gpu_options, const string& name); virtual ~GPUBFCAllocator() {} diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 67caeb3495..518ccba580 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -46,7 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use, } TEST(GPUBFCAllocatorTest, NoDups) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); // Allocate a lot of raw pointers @@ -75,7 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) { } TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); // Allocate 256 raw pointers of sizes between 100 bytes and about // a meg random::PhiloxRandom philox(123, 17); @@ -133,7 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { } TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); float* first_ptr = a.Allocate(1024); @@ -168,18 +168,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { } TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); float* ptr = a.Allocate(0); EXPECT_EQ(nullptr, ptr); } TEST(GPUBFCAllocatorTest, TracksSizes) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); @@ -188,7 +188,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) { // Configure a 1MiB byte limit - GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 20, "GPU_0_bfc"); float* first_ptr = a.Allocate(1 << 6); float* second_ptr = a.Allocate(1 << 20); @@ -203,7 +203,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { options.set_allow_growth(true); // Max of 2GiB, but starts out small. - GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1LL << 31, options, "GPU_0_bfc"); // Allocate 10 raw pointers of sizes between 100 bytes and about // 64 megs. @@ -264,8 +264,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { } TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { - GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); - GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc"); + GPUBFCAllocator b(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc"); void* amem = a.AllocateRaw(1, 1); void* bmem = b.AllocateRaw(1, 1 << 30); a.DeallocateRaw(amem); @@ -273,7 +273,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { } static void BM_Allocation(int iters) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 524288, 512, 1048576, 10485760, 104857600, @@ -289,7 +289,7 @@ static void BM_Allocation(int iters) { BENCHMARK(BM_Allocation); static void BM_AllocationThreaded(int iters, int num_threads) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc"); thread::ThreadPool pool(Env::Default(), "test", num_threads); std::atomic_int_fast32_t count(iters); mutex done_lock; @@ -325,7 +325,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16); // A more complex benchmark that defers deallocation of an object for // "delay" allocations. static void BM_AllocationDelayed(int iters, int delay) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 4096, 512, 1024, 1024}; int size_index = 0; @@ -363,7 +363,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { // only methods inside this class can access private members of BFCAllocator. void TestBinDebugInfo() { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); std::vector initial_ptrs; std::vector initial_ptrs_allocated_sizes; @@ -441,7 +441,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { } void TestLog2FloorNonZeroSlow() { - GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0)); EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1)); EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc index 934a57a5fb..553a5628ad 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc @@ -28,9 +28,10 @@ limitations under the License. namespace tensorflow { GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id) + PlatformGpuId platform_gpu_id) : base_allocator_(allocator) { - stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + stream_exec_ = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); } GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; } diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 856fdc34b4..8f38cc5a18 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -32,7 +32,7 @@ namespace tensorflow { class GPUcudaMallocAllocator : public VisitableAllocator { public: explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + PlatformGpuId platform_gpu_id); ~GPUcudaMallocAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index e4c834b30d..badb021aa5 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -74,9 +74,10 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { // GPUDebugAllocator // ----------------------------------------------------------------------------- GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id) + PlatformGpuId platform_gpu_id) : base_allocator_(allocator) { - stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + stream_exec_ = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); } GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; } @@ -159,9 +160,10 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) { // GPUNanResetAllocator // ----------------------------------------------------------------------------- GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id) + PlatformGpuId platform_gpu_id) : base_allocator_(allocator) { - stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + stream_exec_ = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); } GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; } diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index 0f9b72040c..9e007ed8c1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -34,7 +34,7 @@ namespace tensorflow { class GPUDebugAllocator : public VisitableAllocator { public: explicit GPUDebugAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + PlatformGpuId platform_gpu_id); ~GPUDebugAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; @@ -66,7 +66,7 @@ class GPUDebugAllocator : public VisitableAllocator { class GPUNanResetAllocator : public VisitableAllocator { public: explicit GPUNanResetAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + PlatformGpuId platform_gpu_id); ~GPUNanResetAllocator() override; string Name() override { return "gpu_nan_reset"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index 236a0afa0b..bc3e3a8c35 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -34,10 +34,11 @@ namespace tensorflow { namespace { TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); - auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); + auto stream_exec = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); for (int s : {8}) { std::vector cpu_array(s); @@ -58,11 +59,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { for (int s : {8, 211}) { EXPECT_DEATH( { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); auto stream_exec = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(s); memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); @@ -91,11 +92,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { for (int s : {8, 22}) { EXPECT_DEATH( { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); auto stream_exec = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(s); memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); @@ -121,10 +122,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { } TEST(GPUDebugAllocatorTest, ResetToNan) { - const CudaGpuId cuda_gpu_id(0); - GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); - auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + const PlatformGpuId platform_gpu_id(0); + GPUNanResetAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); + auto stream_exec = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(1024); std::vector cpu_array_result(1024); @@ -161,13 +163,14 @@ TEST(GPUDebugAllocatorTest, ResetToNan) { } TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { - const CudaGpuId cuda_gpu_id(0); + const PlatformGpuId platform_gpu_id(0); // NaN reset must be the outer-most allocator. GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id), - cuda_gpu_id); - auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id), + platform_gpu_id); + auto stream_exec = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(1024); std::vector cpu_array_result(1024); @@ -204,18 +207,18 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { } TEST(GPUDebugAllocatorTest, TracksSizes) { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { - const CudaGpuId cuda_gpu_id(0); + const PlatformGpuId platform_gpu_id(0); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id), - cuda_gpu_id); + new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id), + platform_gpu_id); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 2763ac0d4a..4bf23bc017 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -105,9 +105,9 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface { reinterpret_cast(scratch + Eigen::kCudaScratchSize); stream_ = cuda_stream; allocator_ = alloc; - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id.value()]; + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); + device_prop_ = &Eigen::m_deviceProperties[platform_gpu_id.value()]; } const cudaStream_t& stream() const override { return *stream_; } @@ -332,9 +332,10 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { gpu_device_info_->stream = streams_[0]->compute; gpu_device_info_->default_context = device_contexts_[0]; gpu_device_info_->event_mgr = em_.get(); - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id)); - gpu_device_info_->gpu_id = cuda_gpu_id.value(); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id)); + gpu_device_info_->gpu_id = platform_gpu_id.value(); set_tensorflow_gpu_device_info(gpu_device_info_); // Whether and how the GPU device uses its own threadpool. @@ -690,9 +691,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice { Eigen::GpuDevice device_; }; -// Parse 'visible_device_list' into a list of CUDA GPU ids. +// Parse 'visible_device_list' into a list of platform GPU ids. Status ParseVisibleDeviceList(const string& visible_device_list, - std::vector* visible_gpu_order) { + std::vector* visible_gpu_order) { visible_gpu_order->clear(); se::Platform* gpu_manager = GPUMachineManager(); @@ -707,26 +708,28 @@ Status ParseVisibleDeviceList(const string& visible_device_list, } else { const std::vector order_str = str_util::Split(visible_device_list, ','); - for (const string& cuda_gpu_id_str : order_str) { - int32 cuda_gpu_id; - if (!strings::safe_strto32(cuda_gpu_id_str, &cuda_gpu_id)) { + for (const string& platform_gpu_id_str : order_str) { + int32 platform_gpu_id; + if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) { return errors::InvalidArgument( "Could not parse entry in 'visible_device_list': '", - cuda_gpu_id_str, "'. visible_device_list = ", visible_device_list); + platform_gpu_id_str, "'. visible_device_list = ", + visible_device_list); } - if (cuda_gpu_id < 0 || cuda_gpu_id >= gpu_manager->VisibleDeviceCount()) { + if (platform_gpu_id < 0 || + platform_gpu_id >= gpu_manager->VisibleDeviceCount()) { return errors::InvalidArgument( - "'visible_device_list' listed an invalid GPU id '", cuda_gpu_id, + "'visible_device_list' listed an invalid GPU id '", platform_gpu_id, "' but visible device count is ", gpu_manager->VisibleDeviceCount()); } - visible_gpu_order->push_back(CudaGpuId(cuda_gpu_id)); + visible_gpu_order->push_back(PlatformGpuId(platform_gpu_id)); } } // Validate no repeats. - std::set visible_device_set(visible_gpu_order->begin(), - visible_gpu_order->end()); + std::set visible_device_set(visible_gpu_order->begin(), + visible_gpu_order->end()); if (visible_device_set.size() != visible_gpu_order->size()) { return errors::InvalidArgument( "visible_device_list contained a duplicate entry: ", @@ -737,8 +740,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list, Status VerifyVirtualDeviceSettings( const size_t num_gpus_to_use, const GPUOptions& gpu_options, - const std::vector& visible_gpu_order, - const std::vector& valid_cuda_gpu_ids) { + const std::vector& visible_gpu_order, + const std::vector& valid_platform_gpu_ids) { const auto& virtual_devices = gpu_options.experimental().virtual_devices(); CHECK(!virtual_devices.empty()); if (gpu_options.per_process_gpu_memory_fraction() > 0) { @@ -760,11 +763,11 @@ Status VerifyVirtualDeviceSettings( " #GPUs in visible_device_list: ", visible_gpu_order.size(), " virtual_devices.size(): ", virtual_devices.size()); } - if (valid_cuda_gpu_ids.size() != virtual_devices.size()) { + if (valid_platform_gpu_ids.size() != virtual_devices.size()) { return errors::Unknown( "The number of valid GPUs doesn't match the number of elements in " "the virtual_devices list.", - " #valid GPUs: ", valid_cuda_gpu_ids.size(), + " #valid GPUs: ", valid_platform_gpu_ids.size(), " virtual_devices.size(): ", virtual_devices.size()); } return Status::OK(); @@ -806,18 +809,18 @@ int64 MinSystemMemory(int64 available_memory) { } // Get the memory limit for the virtual device being created on GPU with -// 'cuda_gpu_id', when that virtual device is the only virtual device being +// 'platform_gpu_id', when that virtual device is the only virtual device being // created on that GPU. Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options, - CudaGpuId cuda_gpu_id, + PlatformGpuId platform_gpu_id, int64* memory_limit) { int64 total_memory = 0; int64 available_memory = 0; se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) { return errors::Unknown("Failed to query available memory for GPU ", - cuda_gpu_id.value()); + platform_gpu_id.value()); } int64 allocated_memory = 0; @@ -916,8 +919,8 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, num_gpus_to_use = iter->second; } const auto& gpu_options = options.config.gpu_options(); - std::vector visible_gpu_order; - std::vector valid_cuda_gpu_ids; + std::vector visible_gpu_order; + std::vector valid_platform_gpu_ids; // If we aren't going to use any GPUs, don't initialize them. // We don't want to call ParseVisibleDeviceList if num_gpus_to_use is 0, // because it treats an empty gpu_options.visible_device_list as 'all GPUs are @@ -926,12 +929,12 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(), &visible_gpu_order)); TF_RETURN_IF_ERROR( - GetValidDeviceIds(visible_gpu_order, &valid_cuda_gpu_ids)); + GetValidDeviceIds(visible_gpu_order, &valid_platform_gpu_ids)); } - if (num_gpus_to_use > valid_cuda_gpu_ids.size()) { - num_gpus_to_use = valid_cuda_gpu_ids.size(); + if (num_gpus_to_use > valid_platform_gpu_ids.size()) { + num_gpus_to_use = valid_platform_gpu_ids.size(); } - if (!valid_cuda_gpu_ids.empty()) { + if (!valid_platform_gpu_ids.empty()) { // Save the original device. int original_device = 0; cudaError_t err = cudaGetDevice(&original_device); @@ -941,17 +944,18 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, } // Force to implicitly initialize CUDA runtime on each valid GPU before // CreateGPUDevice(). - for (CudaGpuId cuda_gpu_id : valid_cuda_gpu_ids) { - err = cudaSetDevice(cuda_gpu_id.value()); + for (PlatformGpuId platform_gpu_id : valid_platform_gpu_ids) { + err = cudaSetDevice(platform_gpu_id.value()); if (err != cudaSuccess) { - return errors::Internal("cudaSetDevice() on GPU:", cuda_gpu_id.value(), - " failed. Status: ", cudaGetErrorString(err)); + return errors::Internal("cudaSetDevice() on GPU:", + platform_gpu_id.value(), " failed. Status: ", + cudaGetErrorString(err)); } err = cudaFree(nullptr); if (err != cudaSuccess) { - return errors::Internal( - "CUDA runtime implicit initialization on GPU:", cuda_gpu_id.value(), - " failed. Status: ", cudaGetErrorString(err)); + return errors::Internal("CUDA runtime implicit initialization on GPU:", + platform_gpu_id.value(), " failed. Status: ", + cudaGetErrorString(err)); } } // Reset to the original device. @@ -977,10 +981,10 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, LOG(INFO) << line_buf; for (int i = 0; i < visible_gpu_order.size(); ++i) { line_buf = strings::StrCat(visible_gpu_order[i].value(), ": "); - CudaGpuId cuda_id_i = visible_gpu_order[i]; + PlatformGpuId gpu_id_i = visible_gpu_order[i]; for (int j = 0; j < visible_gpu_order.size(); ++j) { - CudaGpuId cuda_id_j = visible_gpu_order[j]; - if (im.directed_links.find({cuda_id_i, cuda_id_j}) != + PlatformGpuId gpu_id_j = visible_gpu_order[j]; + if (im.directed_links.find({gpu_id_i, gpu_id_j}) != im.directed_links.end()) { line_buf.append("Y "); } else { @@ -993,22 +997,23 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, const auto& virtual_devices = gpu_options.experimental().virtual_devices(); if (!virtual_devices.empty()) { - TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings( - num_gpus_to_use, gpu_options, visible_gpu_order, valid_cuda_gpu_ids)); + TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(num_gpus_to_use, gpu_options, + visible_gpu_order, + valid_platform_gpu_ids)); // We've verified that num_gpus_to_use >= virtual_devices.size(). num_gpus_to_use = virtual_devices.size(); CHECK(gpu_options.visible_device_list().empty() || - valid_cuda_gpu_ids == visible_gpu_order); + valid_platform_gpu_ids == visible_gpu_order); } int next_tf_gpu_id = 0; std::vector memory_limit_bytes; for (int i = 0; i < num_gpus_to_use; ++i) { - const CudaGpuId cuda_gpu_id = valid_cuda_gpu_ids[i]; + const PlatformGpuId platform_gpu_id = valid_platform_gpu_ids[i]; if (virtual_devices.empty() || virtual_devices.Get(i).memory_limit_mb_size() == 0) { int64 single_virtual_device_memory_limit = 0; TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit( - gpu_options, cuda_gpu_id, &single_virtual_device_memory_limit)); + gpu_options, platform_gpu_id, &single_virtual_device_memory_limit)); memory_limit_bytes.push_back(single_virtual_device_memory_limit); } else { const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb(); @@ -1021,7 +1026,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, TfGpuId tf_gpu_id(next_tf_gpu_id); ++next_tf_gpu_id; TF_RETURN_IF_ERROR( - GpuIdManager::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id)); + GpuIdManager::InsertTfPlatformGpuIdPair(tf_gpu_id, platform_gpu_id)); } } const int num_tf_gpus = next_tf_gpu_id; @@ -1046,7 +1051,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, return Status::OK(); } -static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id, +static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id, const se::DeviceDescription& desc) { int cc_major; int cc_minor; @@ -1055,9 +1060,8 @@ static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id, cc_minor = 0; } // LINT.IfChange - return strings::StrCat("device: ", cuda_gpu_id.value(), - ", name: ", desc.name(), - ", pci bus id: ", desc.pci_bus_id(), + return strings::StrCat("device: ", platform_gpu_id.value(), ", name: ", + desc.name(), ", pci bus id: ", desc.pci_bus_id(), ", compute capability: ", cc_major, ".", cc_minor); // LINT.ThenChange(//tensorflow/python/platform/test.py) } @@ -1072,12 +1076,13 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, const string device_name = strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value()); GpuIdUtil::CheckValidTfGpuId(tf_gpu_id); - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); int numa_node = dev_locality.numa_node(); se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); const se::DeviceDescription& desc = se->GetDeviceDescription(); GPUProcessState* process_state = GPUProcessState::singleton(); Allocator* gpu_allocator = process_state->GetGPUAllocator( @@ -1098,11 +1103,11 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, // TODO(laigd): report error if memory_limit doesn't match stats.bytes_limit. BaseGPUDevice* gpu_device = CreateGPUDevice( options, device_name, static_cast(stats.bytes_limit), dev_locality, - tf_gpu_id, GetShortDeviceDescription(cuda_gpu_id, desc), gpu_allocator, - ProcessState::singleton()->GetCPUAllocator(numa_node)); + tf_gpu_id, GetShortDeviceDescription(platform_gpu_id, desc), + gpu_allocator, ProcessState::singleton()->GetCPUAllocator(numa_node)); LOG(INFO) << "Created TensorFlow device (" << device_name << " with " << (stats.bytes_limit >> 20) << " MB memory) -> physical GPU (" - << GetShortDeviceDescription(cuda_gpu_id, desc) << ")"; + << GetShortDeviceDescription(platform_gpu_id, desc) << ")"; TF_RETURN_IF_ERROR(gpu_device->Init(options)); devices->push_back(gpu_device); @@ -1110,18 +1115,21 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, } namespace { -std::unique_ptr, bool>> +std::unique_ptr, bool>> GetPeerAccessMap(se::Platform* platform, - const std::vector& visible_gpu_order) { - std::unique_ptr, bool>> map( - new std::map, bool>); - for (CudaGpuId cuda_gpu_i : visible_gpu_order) { - for (CudaGpuId cuda_gpu_j : visible_gpu_order) { + const std::vector& visible_gpu_order) { + std::unique_ptr, bool>> map( + new std::map, bool>); + for (PlatformGpuId platform_gpu_i : visible_gpu_order) { + for (PlatformGpuId platform_gpu_j : visible_gpu_order) { se::StreamExecutor* from = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i) + .ValueOrDie(); se::StreamExecutor* to = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie(); - (*map)[{cuda_gpu_i, cuda_gpu_j}] = from->CanEnablePeerAccessTo(to); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j) + .ValueOrDie(); + (*map)[{platform_gpu_i, platform_gpu_j}] = + from->CanEnablePeerAccessTo(to); } } @@ -1131,19 +1139,19 @@ GetPeerAccessMap(se::Platform* platform, } // namespace Status BaseGPUDeviceFactory::GetInterconnectMaps( - const std::vector& visible_gpu_order, se::Platform* gpu_manager, - std::vector* maps) { + const std::vector& visible_gpu_order, + se::Platform* gpu_manager, std::vector* maps) { // The default interconnect map is obtained from the StreamExecutor. auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order); maps->resize(1); InterconnectMap& imap = maps->at(0); imap.name = "StreamExecutor"; imap.strength = InterconnectMap::kStreamExecutorStrength; - for (CudaGpuId cuda_id_i : visible_gpu_order) { - for (CudaGpuId cuda_id_j : visible_gpu_order) { - if (cuda_id_i == cuda_id_j) continue; - if ((*access_map)[{cuda_id_i, cuda_id_j}]) { - imap.directed_links.insert({cuda_id_i, cuda_id_j}); + for (PlatformGpuId gpu_id_i : visible_gpu_order) { + for (PlatformGpuId gpu_id_j : visible_gpu_order) { + if (gpu_id_i == gpu_id_j) continue; + if ((*access_map)[{gpu_id_i, gpu_id_j}]) { + imap.directed_links.insert({gpu_id_i, gpu_id_j}); } } } @@ -1158,13 +1166,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( all_tf_gpu_ids.push_back(TfGpuId(i)); } for (TfGpuId tf_gpu_id : all_tf_gpu_ids) { - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); // Get GPU bus_id from its reported NUMA affinity. Because GPUs are // virtualized in some environments, we can't just use the GPU id. // NUMA locales are indexed from 0, buses are indexed from 1. se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); const se::DeviceDescription& desc = se->GetDeviceDescription(); int numa_node = desc.numa_node(); if (numa_node < 0) { @@ -1174,7 +1183,8 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( // may run into trouble later with data transfer operations. The // trouble may manifest as slower than expected performance, or // outright failures. - LOG(INFO) << "Could not identify NUMA node of CUDA gpu id " << cuda_gpu_id + LOG(INFO) << "Could not identify NUMA node of platform GPU id " + << platform_gpu_id << ", defaulting to 0. Your kernel may not have been built " << "with NUMA support."; numa_node = 0; @@ -1187,10 +1197,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( LocalLinks* links = dev_locality.mutable_links(); for (const InterconnectMap& imap : interconnects) { for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) { - CudaGpuId cuda_gpu_dst; + PlatformGpuId platform_gpu_dst; TF_RETURN_IF_ERROR( - GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst)); - if (imap.directed_links.find({cuda_gpu_id, cuda_gpu_dst}) != + GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst)); + if (imap.directed_links.find({platform_gpu_id, platform_gpu_dst}) != imap.directed_links.end()) { InterconnectLink* ilink = links->add_link(); ilink->set_device_id(tf_gpu_dst.value()); @@ -1204,10 +1214,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( // add high strength links to the others. for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) { if (tf_gpu_id == tf_gpu_dst) continue; - CudaGpuId cuda_gpu_dst; + PlatformGpuId platform_gpu_dst; TF_RETURN_IF_ERROR( - GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst)); - if (cuda_gpu_id == cuda_gpu_dst) { + GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst)); + if (platform_gpu_id == platform_gpu_dst) { InterconnectLink* ilink = links->add_link(); ilink->set_device_id(tf_gpu_dst.value()); ilink->set_type("SAME_DEVICE"); @@ -1216,9 +1226,9 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( } (*localities)[tf_gpu_id] = dev_locality; - VLOG(1) << "GPUDevice CudaGpuId " << cuda_gpu_id << " TfGpuId " << tf_gpu_id - << " on bus " << dev_locality.bus_id() << " numa: " << numa_node - << " pci: " << desc.pci_bus_id() + VLOG(1) << "GPUDevice PlatformGpuId " << platform_gpu_id << " TfGpuId " + << tf_gpu_id << " on bus " << dev_locality.bus_id() + << " numa: " << numa_node << " pci: " << desc.pci_bus_id() << " DeviceLocality: " << dev_locality.DebugString(); } return Status::OK(); @@ -1226,14 +1236,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( static int GetDefaultMinGPUMultiprocessorCount( se::Platform* gpu_manager, - const std::vector& visible_gpu_order) { + const std::vector& visible_gpu_order) { static const int kDefaultMinGPUMultiprocessorCount = 8; // Find the highest multi-processor count across all visible GPUs. int max_count = -1; for (int i = 0; i < visible_gpu_order.size(); ++i) { auto exec_status = - GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_order[i]); + GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_order[i]); if (!exec_status.ok()) { continue; } @@ -1252,7 +1262,7 @@ static int GetDefaultMinGPUMultiprocessorCount( static int GetMinGPUMultiprocessorCount( se::Platform* gpu_manager, - const std::vector& visible_gpu_order) { + const std::vector& visible_gpu_order) { const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT"); if (tf_min_gpu_core_count == nullptr || @@ -1330,18 +1340,20 @@ std::vector GetSupportedCudaComputeCapabilities() { } Status EnablePeerAccess(se::Platform* platform, - const std::vector& visible_gpu_order) { + const std::vector& visible_gpu_order) { int possible_peer_count = 0; int enabled_peer_count = 0; for (int i = 0; i < visible_gpu_order.size(); ++i) { - const CudaGpuId cuda_gpu_i = visible_gpu_order[i]; + const PlatformGpuId platform_gpu_i = visible_gpu_order[i]; for (int j = 0; j < visible_gpu_order.size(); ++j) { - const CudaGpuId cuda_gpu_j = visible_gpu_order[j]; + const PlatformGpuId platform_gpu_j = visible_gpu_order[j]; // We have already validated that ExecutorForDevice() calls return OK. se::StreamExecutor* from = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i) + .ValueOrDie(); se::StreamExecutor* to = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j) + .ValueOrDie(); if (from->CanEnablePeerAccessTo(to)) { ++possible_peer_count; @@ -1349,7 +1361,8 @@ Status EnablePeerAccess(se::Platform* platform, if (!status.ok()) { LOG(WARNING) << "Unable to enable peer access between device ordinals " - << cuda_gpu_i << " and " << cuda_gpu_j << ", status: " << status; + << platform_gpu_i << " and " << platform_gpu_j + << ", status: " << status; } else { ++enabled_peer_count; } @@ -1372,22 +1385,23 @@ Status EnablePeerAccess(se::Platform* platform, } // namespace Status BaseGPUDeviceFactory::GetValidDeviceIds( - const std::vector& visible_gpu_order, - std::vector* ids) { + const std::vector& visible_gpu_order, + std::vector* ids) { se::Platform* gpu_manager = GPUMachineManager(); bool new_gpu_found = false; for (int i = 0; i < visible_gpu_order.size(); ++i) { - const CudaGpuId cuda_gpu_id = visible_gpu_order[i]; + const PlatformGpuId visible_gpu_id = visible_gpu_order[i]; - // Only perform this once per visible cuda gpu id. - if (visible_gpu_initialized_[cuda_gpu_id.value()]) { + // Only perform this once per visible platform gpu id. + if (visible_gpu_initialized_[visible_gpu_id.value()]) { continue; } - visible_gpu_initialized_[cuda_gpu_id.value()] = true; + visible_gpu_initialized_[visible_gpu_id.value()] = true; new_gpu_found = true; - auto executor = GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, cuda_gpu_id); + auto executor = + GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id); if (!executor.ok()) { return executor.status(); } @@ -1435,9 +1449,9 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds( // Filter out devices that don't have the right capability or power. for (int i = 0; i < visible_gpu_order.size(); ++i) { - const CudaGpuId visible_gpu_id = visible_gpu_order[i]; + const PlatformGpuId visible_gpu_id = visible_gpu_order[i]; auto exec_status = - GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_id); + GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id); if (!exec_status.ok()) { LOG(INFO) << "Ignoring visible gpu device " << visible_gpu_id << " whose executor is in invalid state: " @@ -1486,7 +1500,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds( if (!ids->empty()) { std::vector raw_ids(ids->size()); std::transform(ids->begin(), ids->end(), raw_ids.begin(), - [](CudaGpuId id) -> int { return id.value(); }); + [](PlatformGpuId id) -> int { return id.value(); }); LOG(INFO) << "Adding visible gpu devices: " << str_util::Join(raw_ids, ", "); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 56d03d7a8c..684cc0c1de 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -89,12 +89,12 @@ class BaseGPUDevice : public LocalDevice { void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, DeviceContext* dc, Allocator* allocator) override; - // Returns the CUDA GPU id of this device within the native driver system; + // Returns the platform GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. int gpu_id() const { - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id)); - return cuda_gpu_id.value(); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id)); + return platform_gpu_id.value(); } // The executor that provides control for the device; e.g., for CUDA this @@ -168,14 +168,14 @@ class BaseGPUDeviceFactory : public DeviceFactory { int32 strength; static const int kSameDeviceStrength; static const int kStreamExecutorStrength; - std::set> directed_links; + std::set> directed_links; }; protected: // Populates *maps with interconnect maps for all local direct access // pathways between GPUs. virtual Status GetInterconnectMaps( - const std::vector& visible_gpu_order, + const std::vector& visible_gpu_order, se::Platform* gpu_manager, std::vector* maps); struct TfGpuIdHash { @@ -207,16 +207,16 @@ class BaseGPUDeviceFactory : public DeviceFactory { Allocator* gpu_allocator, Allocator* cpu_allocator) = 0; - // Returns into 'ids' the list of valid CUDA GPU ids, in the order that + // Returns into 'ids' the list of valid platform GPU ids, in the order that // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc, // based upon 'visible_gpu_order' which was generated by parsing // GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU // ids. - Status GetValidDeviceIds(const std::vector& visible_gpu_order, - std::vector* ids); + Status GetValidDeviceIds(const std::vector& visible_gpu_order, + std::vector* ids); - // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id - // has been initialized by the process. + // visible_gpu_initialized_[platform_gpu_id] is true if visible GPU + // platform_gpu_id has been initialized by the process. std::unordered_map visible_gpu_initialized_; }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc index daf59f0560..36294094e9 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc @@ -30,18 +30,21 @@ namespace tensorflow { namespace { const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0"; -int64 GetTotalGPUMemory(CudaGpuId gpu_id) { +int64 GetTotalGPUMemory(PlatformGpuId gpu_id) { se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id) + .ValueOrDie(); int64 total_memory, available_memory; CHECK(se->DeviceMemoryUsage(&available_memory, &total_memory)); return total_memory; } -Status GetComputeCapability(CudaGpuId gpu_id, int* cc_major, int* cc_minor) { +Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major, + int* cc_minor) { se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id) + .ValueOrDie(); if (!se->GetDeviceDescription().cuda_compute_capability(cc_major, cc_minor)) { *cc_major = 0; *cc_minor = 0; @@ -223,7 +226,7 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) { // error. TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) { int cc_major, cc_minor; - TF_ASSERT_OK(GetComputeCapability(CudaGpuId(0), &cc_major, &cc_minor)); + TF_ASSERT_OK(GetComputeCapability(PlatformGpuId(0), &cc_major, &cc_minor)); // Exit early while running on Pascal or later GPUs. if (cc_major >= 6) { return; @@ -244,10 +247,10 @@ TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) { // more memory than what is available on the device. TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) { static constexpr double kGpuMemoryFraction = 1.2; - static constexpr CudaGpuId kCudaGpuId(0); + static constexpr PlatformGpuId kPlatformGpuId(0); int cc_major, cc_minor; - TF_ASSERT_OK(GetComputeCapability(kCudaGpuId, &cc_major, &cc_minor)); + TF_ASSERT_OK(GetComputeCapability(kPlatformGpuId, &cc_major, &cc_minor)); // Exit early if running on pre-Pascal GPUs. if (cc_major < 6) { LOG(INFO) @@ -262,7 +265,7 @@ TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) { ASSERT_EQ(1, devices.size()); int64 memory_limit = devices[0]->attributes().memory_limit(); - ASSERT_EQ(memory_limit, static_cast(GetTotalGPUMemory(kCudaGpuId) * + ASSERT_EQ(memory_limit, static_cast(GetTotalGPUMemory(kPlatformGpuId) * kGpuMemoryFraction)); AllocatorAttributes allocator_attributes = AllocatorAttributes(); diff --git a/tensorflow/core/common_runtime/gpu/gpu_id.h b/tensorflow/core/common_runtime/gpu/gpu_id.h index 2a6caea296..f0d9022821 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id.h @@ -25,10 +25,10 @@ namespace tensorflow { // physical machine, it can be filtered by CUDA environment variable // CUDA_VISIBLE_DEVICES. Note that this id is not visible to Tensorflow, but // result after filtering by CUDA_VISIBLE_DEVICES is visible to TF and is -// called CUDA GPU id as below. See +// called platform GPU id as below. See // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars // for more details. -// - CUDA GPU id (also called *visible* GPU id in +// - *platform* GPU id (also called *visible* GPU id in // third_party/tensorflow/core/protobuf/config.proto): this is the id that is // visible to Tensorflow after filtering by CUDA_VISIBLE_DEVICES, and is // generated by the CUDA GPU driver. It starts from 0 and is used for CUDA API @@ -39,14 +39,14 @@ namespace tensorflow { // field of the device name "/device:GPU:", and is also the identifier of // a BaseGPUDevice. Note that the configuration allows us to create multiple // BaseGPUDevice per GPU hardware in order to use multi CUDA streams on the -// hardware, so the mapping between TF GPU id and CUDA GPU id is not a 1:1 +// hardware, so the mapping between TF GPU id and platform GPU id is not a 1:1 // mapping, see the example below. // // For example, assuming that in the machine we have GPU device with index 0, 1, // 2 and 3 (physical GPU id). Setting "CUDA_VISIBLE_DEVICES=1,2,3" will create -// the following mapping between CUDA GPU id and physical GPU id: +// the following mapping between platform GPU id and physical GPU id: // -// CUDA GPU id -> physical GPU id +// platform GPU id -> physical GPU id // 0 -> 1 // 1 -> 2 // 2 -> 3 @@ -56,32 +56,32 @@ namespace tensorflow { // // Assuming we configure the Session to create one BaseGPUDevice per GPU // hardware, then setting GPUOptions::visible_device_list to "2,0" will create -// the following mappting between TF GPU id and CUDA GPU id: +// the following mappting between TF GPU id and platform GPU id: // -// TF GPU id -> CUDA GPU ID +// TF GPU id -> platform GPU ID // 0 (i.e. /device:GPU:0) -> 2 // 1 (i.e. /device:GPU:1) -> 0 // -// Note that CUDA GPU id 1 is filtered out by GPUOptions::visible_device_list, -// so it won't be used by the TF process. +// Note that platform GPU id 1 is filtered out by +// GPUOptions::visible_device_list, so it won't be used by the TF process. // // On the other hand, if we configure it to create 2 BaseGPUDevice per GPU // hardware, then setting GPUOptions::visible_device_list to "2,0" will create -// the following mappting between TF GPU id and CUDA GPU id: +// the following mappting between TF GPU id and platform GPU id: // -// TF GPU id -> CUDA GPU ID +// TF GPU id -> platform GPU ID // 0 (i.e. /device:GPU:0) -> 2 // 1 (i.e. /device:GPU:1) -> 2 // 2 (i.e. /device:GPU:2) -> 0 // 3 (i.e. /device:GPU:3) -> 0 // -// We create strong-typed integer classes for both TF GPU id and CUDA GPU id to -// minimize programming errors and improve code readability. Except for the +// We create strong-typed integer classes for both TF GPU id and platform GPU id +// to minimize programming errors and improve code readability. Except for the // StreamExecutor interface (as we don't change its API), whenever we need a -// TF GPU id (or CUDA GPU id) we should use TfGpuId (or CudaGpuId) instead of a -// raw integer. +// TF GPU id (or platform GPU id) we should use TfGpuId (or PlatformGpuId) +// instead of a raw integer. TF_LIB_GTL_DEFINE_INT_TYPE(TfGpuId, int32); -TF_LIB_GTL_DEFINE_INT_TYPE(CudaGpuId, int32); +TF_LIB_GTL_DEFINE_INT_TYPE(PlatformGpuId, int32); } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index b5099dc8ef..2b40730119 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -26,26 +26,27 @@ limitations under the License. namespace tensorflow { namespace { -// Manages the map between TfGpuId and CUDA GPU id. -class TfToCudaGpuIdMap { +// Manages the map between TfGpuId and platform GPU id. +class TfToPlatformGpuIdMap { public: - static TfToCudaGpuIdMap* singleton() { - static auto* id_map = new TfToCudaGpuIdMap; + static TfToPlatformGpuIdMap* singleton() { + static auto* id_map = new TfToPlatformGpuIdMap; return id_map; } - Status Insert(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) LOCKS_EXCLUDED(mu_) { + Status Insert(TfGpuId tf_gpu_id, PlatformGpuId platform_gpu_id) + LOCKS_EXCLUDED(mu_) { std::pair result; { mutex_lock lock(mu_); - result = id_map_.insert({tf_gpu_id.value(), cuda_gpu_id.value()}); + result = id_map_.insert({tf_gpu_id.value(), platform_gpu_id.value()}); } - if (!result.second && cuda_gpu_id.value() != result.first->second) { + if (!result.second && platform_gpu_id.value() != result.first->second) { return errors::AlreadyExists( "TensorFlow device (GPU:", tf_gpu_id.value(), ") is being mapped to " "multiple CUDA devices (", - cuda_gpu_id.value(), " now, and ", result.first->second, + platform_gpu_id.value(), " now, and ", result.first->second, " previously), which is not supported. " "This may be the result of providing different GPU configurations " "(ConfigProto.gpu_options, for example different visible_device_list)" @@ -56,17 +57,17 @@ class TfToCudaGpuIdMap { return Status::OK(); } - bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const + bool Find(TfGpuId tf_gpu_id, PlatformGpuId* platform_gpu_id) const LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); auto result = id_map_.find(tf_gpu_id.value()); if (result == id_map_.end()) return false; - *cuda_gpu_id = result->second; + *platform_gpu_id = result->second; return true; } private: - TfToCudaGpuIdMap() = default; + TfToPlatformGpuIdMap() = default; void TestOnlyReset() LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); @@ -78,17 +79,18 @@ class TfToCudaGpuIdMap { IdMapType id_map_ GUARDED_BY(mu_); friend class ::tensorflow::GpuIdManager; - TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap); + TF_DISALLOW_COPY_AND_ASSIGN(TfToPlatformGpuIdMap); }; } // namespace -Status GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, - CudaGpuId cuda_gpu_id) { - return TfToCudaGpuIdMap::singleton()->Insert(tf_gpu_id, cuda_gpu_id); +Status GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id, + PlatformGpuId platform_gpu_id) { + return TfToPlatformGpuIdMap::singleton()->Insert(tf_gpu_id, platform_gpu_id); } -Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { - if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) { +Status GpuIdManager::TfToPlatformGpuId(TfGpuId tf_gpu_id, + PlatformGpuId* platform_gpu_id) { + if (TfToPlatformGpuIdMap::singleton()->Find(tf_gpu_id, platform_gpu_id)) { return Status::OK(); } return errors::NotFound("TensorFlow device GPU:", tf_gpu_id.value(), @@ -96,7 +98,7 @@ Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { } void GpuIdManager::TestOnlyReset() { - TfToCudaGpuIdMap::singleton()->TestOnlyReset(); + TfToPlatformGpuIdMap::singleton()->TestOnlyReset(); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h index 491d92ccdd..62df4310c4 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -21,15 +21,17 @@ limitations under the License. namespace tensorflow { -// Class that maintains a map from TfGpuId to CudaGpuId, and manages the +// Class that maintains a map from TfGpuId to PlatformGpuId, and manages the // translation between them. class GpuIdManager { public: - // Adds a mapping from tf_gpu_id to cuda_gpu_id. - static Status InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + // Adds a mapping from tf_gpu_id to platform_gpu_id. + static Status InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id, + PlatformGpuId platform_gpu_id); - // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found. - static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id); + // Gets the platform_gpu_id associated with tf_gpu_id. Returns OK if found. + static Status TfToPlatformGpuId(TfGpuId tf_gpu_id, + PlatformGpuId* platform_gpu_id); // Clears the map. Used in unit tests only. static void TestOnlyReset(); diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc index a663ec7051..8bf3c6a308 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc @@ -22,38 +22,38 @@ limitations under the License. namespace tensorflow { namespace { -CudaGpuId TfToCudaGpuId(TfGpuId tf) { - CudaGpuId cuda; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf, &cuda)); - return cuda; +PlatformGpuId TfToPlatformGpuId(TfGpuId tf) { + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf, &platform_gpu_id)); + return platform_gpu_id; } TEST(GpuIdManagerTest, Basics) { TfGpuId key_0(0); - CudaGpuId value_0(0); - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0)); - EXPECT_EQ(value_0, TfToCudaGpuId(key_0)); + PlatformGpuId value_0(0); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0)); + EXPECT_EQ(value_0, TfToPlatformGpuId(key_0)); // Multiple calls to map the same value is ok. - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0)); - EXPECT_EQ(value_0, TfToCudaGpuId(key_0)); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0)); + EXPECT_EQ(value_0, TfToPlatformGpuId(key_0)); // Map a different TfGpuId to a different value. TfGpuId key_1(3); - CudaGpuId value_1(2); - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_1, value_1)); - EXPECT_EQ(value_1, TfToCudaGpuId(key_1)); + PlatformGpuId value_1(2); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_1, value_1)); + EXPECT_EQ(value_1, TfToPlatformGpuId(key_1)); // Mapping a different TfGpuId to the same value is ok. TfGpuId key_2(10); - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_1)); - EXPECT_EQ(value_1, TfToCudaGpuId(key_2)); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_1)); + EXPECT_EQ(value_1, TfToPlatformGpuId(key_2)); // Mapping the same TfGpuId to a different value. - ASSERT_FALSE(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_0).ok()); + ASSERT_FALSE(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_0).ok()); // Getting a nonexistent mapping. - ASSERT_FALSE(GpuIdManager::TfToCudaGpuId(TfGpuId(100), &value_0).ok()); + ASSERT_FALSE(GpuIdManager::TfToPlatformGpuId(TfGpuId(100), &value_0).ok()); } } // namespace diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h index b9c66b3328..b1f10fb1dc 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h @@ -24,34 +24,37 @@ limitations under the License. namespace tensorflow { -// Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids. +// Utility methods for translation between Tensorflow GPU ids and platform GPU +// ids. class GpuIdUtil { public: // Convenient methods for getting the associated executor given a TfGpuId or - // CudaGpuId. - static se::port::StatusOr ExecutorForCudaGpuId( - se::Platform* gpu_manager, CudaGpuId cuda_gpu_id) { - return gpu_manager->ExecutorForDevice(cuda_gpu_id.value()); + // PlatformGpuId. + static se::port::StatusOr ExecutorForPlatformGpuId( + se::Platform* gpu_manager, PlatformGpuId platform_gpu_id) { + return gpu_manager->ExecutorForDevice(platform_gpu_id.value()); } - static se::port::StatusOr ExecutorForCudaGpuId( - CudaGpuId cuda_gpu_id) { - return ExecutorForCudaGpuId(GPUMachineManager(), cuda_gpu_id); + static se::port::StatusOr ExecutorForPlatformGpuId( + PlatformGpuId platform_gpu_id) { + return ExecutorForPlatformGpuId(GPUMachineManager(), platform_gpu_id); } static se::port::StatusOr ExecutorForTfGpuId( TfGpuId tf_gpu_id) { - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - return ExecutorForCudaGpuId(cuda_gpu_id); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); + return ExecutorForPlatformGpuId(platform_gpu_id); } - // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate. + // Verify that the platform_gpu_id associated with a TfGpuId is legitimate. static void CheckValidTfGpuId(TfGpuId tf_gpu_id) { - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); const int visible_device_count = GPUMachineManager()->VisibleDeviceCount(); - CHECK_LT(cuda_gpu_id.value(), visible_device_count) - << "cuda_gpu_id is outside discovered device range." - << " TF GPU id: " << tf_gpu_id << " CUDA GPU id: " << cuda_gpu_id + CHECK_LT(platform_gpu_id.value(), visible_device_count) + << "platform_gpu_id is outside discovered device range." + << " TF GPU id: " << tf_gpu_id + << " platform GPU id: " << platform_gpu_id << " visible device count: " << visible_device_count; } }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index b18688174d..a5b46382f1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -106,22 +106,23 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, return nullptr; } - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); gpu_allocator = - new GPUBFCAllocator(cuda_gpu_id, total_bytes, options, + new GPUBFCAllocator(platform_gpu_id, total_bytes, options, strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc")); // If true, checks for memory overwrites by writing // distinctive patterns on both ends of allocated memory. if (useCudaMemoryGuardAllocator()) { - gpu_allocator = new GPUDebugAllocator(gpu_allocator, cuda_gpu_id); - gpu_allocator = new GPUNanResetAllocator(gpu_allocator, cuda_gpu_id); + gpu_allocator = new GPUDebugAllocator(gpu_allocator, platform_gpu_id); + gpu_allocator = new GPUNanResetAllocator(gpu_allocator, platform_gpu_id); } else if (useCudaMallocAllocator()) { // If true, passes all allocation requests through to cudaMalloc // useful for doing memory debugging with tools like cuda-memcheck // **WARNING** probably will not work in a multi-gpu scenario - gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id); + gpu_allocator = + new GPUcudaMallocAllocator(gpu_allocator, platform_gpu_id); } gpu_allocators_[tf_gpu_id.value()] = gpu_allocator; @@ -138,7 +139,7 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::GPU; - md.dev_index = cuda_gpu_id.value(); + md.dev_index = platform_gpu_id.value(); md.gpu_registered = false; md.nic_registered = true; if (static_cast(gpu_al_.size()) <= tf_gpu_id.value()) { diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index b97603c890..e4f6bf7c86 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -93,13 +93,13 @@ Status SingleMachine::Provision() { strings::StrCat("Not able to parse GPU device name: ", dev.name())); } TfGpuId tf_gpu_id(parsed.id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (!s.ok()) { return errors::Unavailable("Unknown TF GPU device with id ", tf_gpu_id.value(), ": ", s.ToString()); } - attr = GetLocalGPUInfo(cuda_gpu_id); + attr = GetLocalGPUInfo(platform_gpu_id); } else if (dev.device_type().find("XLA") == string::npos) { // Filter out the fake XLA devices to avoid double counting the actual // hardware resources that are available. diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index a7519725a5..567e7c075e 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -70,13 +70,14 @@ DeviceProperties GetLocalCPUInfo() { return device; } -DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) { +DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) { DeviceProperties device; device.set_type("GPU"); #if GOOGLE_CUDA cudaDeviceProp properties; - cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value()); + cudaError_t error = + cudaGetDeviceProperties(&properties, platform_gpu_id.value()); if (error != cudaSuccess) { device.set_type("UNKNOWN"); LOG(ERROR) << "Failed to get device properties, error code: " << error; @@ -122,15 +123,15 @@ DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { } else if (device.type == "GPU") { if (device.has_id) { TfGpuId tf_gpu_id(device.id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (!s.ok()) { LOG(ERROR) << s; return unknown; } - return GetLocalGPUInfo(cuda_gpu_id); + return GetLocalGPUInfo(platform_gpu_id); } else { - return GetLocalGPUInfo(CudaGpuId(0)); + return GetLocalGPUInfo(PlatformGpuId(0)); } } return unknown; diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h index ca15c48006..f0a342b728 100644 --- a/tensorflow/core/grappler/clusters/utils.h +++ b/tensorflow/core/grappler/clusters/utils.h @@ -28,7 +28,7 @@ DeviceProperties GetLocalCPUInfo(); // Returns the DeviceProperties for the specified GPU attached to the server on // which grappler is running. -DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id); +DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id); // Returns the DeviceProperties of the specified device DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device); diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc index 74218adbac..3863d62980 100644 --- a/tensorflow/core/grappler/clusters/utils_test.cc +++ b/tensorflow/core/grappler/clusters/utils_test.cc @@ -31,22 +31,22 @@ TEST(UtilsTest, GetLocalGPUInfo) { LOG(INFO) << "CUDA is enabled."; DeviceProperties properties; - // Invalid CUDA GPU ID. - properties = GetLocalGPUInfo(CudaGpuId(100)); + // Invalid platform GPU ID. + properties = GetLocalGPUInfo(PlatformGpuId(100)); EXPECT_EQ("UNKNOWN", properties.type()); - // Succeed when a valid CUDA GPU id was inserted. - properties = GetLocalGPUInfo(CudaGpuId(0)); + // Succeed when a valid platform GPU id was inserted. + properties = GetLocalGPUInfo(PlatformGpuId(0)); EXPECT_EQ("GPU", properties.type()); EXPECT_EQ("NVIDIA", properties.vendor()); #else LOG(INFO) << "CUDA is not enabled."; DeviceProperties properties; - properties = GetLocalGPUInfo(CudaGpuId(0)); + properties = GetLocalGPUInfo(PlatformGpuId(0)); EXPECT_EQ("GPU", properties.type()); - properties = GetLocalGPUInfo(CudaGpuId(100)); + properties = GetLocalGPUInfo(PlatformGpuId(100)); EXPECT_EQ("GPU", properties.type()); #endif } @@ -74,20 +74,20 @@ TEST(UtilsTest, GetDeviceInfo) { EXPECT_EQ("NVIDIA", properties.vendor()); #endif - // TF to CUDA GPU id mapping entry doesn't exist. + // TF to platform GPU id mapping entry doesn't exist. device.has_id = true; device.id = 0; properties = GetDeviceInfo(device); EXPECT_EQ("UNKNOWN", properties.type()); #if GOOGLE_CUDA - // Invalid CUDA GPU id. - GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100)); + // Invalid platform GPU id. + GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100)); properties = GetDeviceInfo(device); EXPECT_EQ("UNKNOWN", properties.type()); - // Valid CUDA GPU id. - GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0)); + // Valid platform GPU id. + GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(1), PlatformGpuId(0)); device.id = 1; properties = GetDeviceInfo(device); EXPECT_EQ("GPU", properties.type()); diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index aad00ce039..7691f25327 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -209,13 +209,13 @@ DeviceProperties GetDeviceInfo(const string& device_str) { if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { if (parsed.type == "GPU") { TfGpuId tf_gpu_id(parsed.id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (!s.ok()) { // We are probably running simulation without linking cuda libraries. - cuda_gpu_id = CudaGpuId(parsed.id); + platform_gpu_id = PlatformGpuId(parsed.id); } - return GetLocalGPUInfo(cuda_gpu_id); + return GetLocalGPUInfo(platform_gpu_id); } else if (parsed.type == "CPU") { return GetLocalCPUInfo(); } diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index da3a99565e..c68504a272 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -68,7 +68,7 @@ message GPUOptions { // after the process starts. Users are required to use vendor // specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the // physical to visible device mapping prior to invoking TensorFlow. - // 2. In the code, the ids in this list are also called "CUDA GPU id"s, + // 2. In the code, the ids in this list are also called "platform GPU id"s, // and the 'virtual' ids of GPU devices (i.e. the ids in the device // name "/device:GPU:") are also called "TF GPU id"s. Please // refer to third_party/tensorflow/core/common_runtime/gpu/gpu_id.h -- GitLab From 204ef67242ce7fbba067b631c4d6c4bcd64288c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 1 Sep 2018 21:06:52 +0800 Subject: [PATCH 0069/1357] CLN: remove print method, and append error msg to exception --- tensorflow/python/framework/test_util.py | 30 ++++++++++--------- tensorflow/python/framework/test_util_test.py | 8 +++++ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index b5388ad0b2..6d03e956da 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -1329,35 +1329,36 @@ class TensorFlowTestCase(googletest.TestCase): self.assertEqual( a.shape, b.shape, "Shape mismatch: expected %s, got %s." % (a.shape, b.shape)) + msgs = [msg] if not np.allclose(a, b, rtol=rtol, atol=atol): - # Prints more details than np.testing.assert_allclose. + # Add more details than np.testing.assert_allclose. # # NOTE: numpy.allclose (and numpy.testing.assert_allclose) # checks whether two arrays are element-wise equal within a # tolerance. The relative difference (rtol * abs(b)) and the # absolute difference atol are added together to compare against # the absolute difference between a and b. Here, we want to - # print out which elements violate such conditions. + # tell user which elements violate such conditions. cond = np.logical_or( np.abs(a - b) > atol + rtol * np.abs(b), np.isnan(a) != np.isnan(b)) if a.ndim: x = a[np.where(cond)] y = b[np.where(cond)] - print("not close where = ", np.where(cond)) + msgs.append("not close where = {}".format(np.where(cond))) else: # np.where is broken for scalars x, y = a, b - print("not close lhs = ", x) - print("not close rhs = ", y) - print("not close dif = ", np.abs(x - y)) - print("not close tol = ", atol + rtol * np.abs(y)) - print("dtype = %s, shape = %s" % (a.dtype, a.shape)) + msgs.append("not close lhs = {}".format(x)) + msgs.append("not close rhs = {}".format(y)) + msgs.append("not close dif = {}".format(np.abs(x - y))) + msgs.append("not close tol = {}".format(atol + rtol * np.abs(y))) + msgs.append("dtype = {}, shape = {}".format(a.dtype, a.shape)) # TODO(xpan): There seems to be a bug: # tensorflow/compiler/tests:binary_ops_test pass with float32 # nan even though the equal_nan is False by default internally. np.testing.assert_allclose( - a, b, rtol=rtol, atol=atol, err_msg=msg, equal_nan=True) + a, b, rtol=rtol, atol=atol, err_msg="\n".join(msgs), equal_nan=True) def _assertAllCloseRecursive(self, a, @@ -1539,19 +1540,20 @@ class TensorFlowTestCase(googletest.TestCase): np.float16, np.float32, np.float64, dtypes.bfloat16.as_numpy_dtype ]): same = np.logical_or(same, np.logical_and(np.isnan(a), np.isnan(b))) + msgs = [msg] if not np.all(same): - # Prints more details than np.testing.assert_array_equal. + # Add more details than np.testing.assert_array_equal. diff = np.logical_not(same) if a.ndim: x = a[np.where(diff)] y = b[np.where(diff)] - print("not equal where = ", np.where(diff)) + msgs.append("not equal where = {}".format(np.where(diff))) else: # np.where is broken for scalars x, y = a, b - print("not equal lhs = ", x) - print("not equal rhs = ", y) - np.testing.assert_array_equal(a, b, err_msg=msg) + msgs.append("not equal lhs = {}".format(x)) + msgs.append("not equal rhs = {}".format(y)) + np.testing.assert_array_equal(a, b, err_msg="\n".join(msgs)) def assertAllGreater(self, a, comparison_target): """Assert element values are all greater than a target value. diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index a0939f98b2..c9b5d46f98 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -270,6 +270,11 @@ class TestUtilTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(AssertionError, r"Not equal to tolerance"): self.assertAllClose(7, 7 + 1e-5) + @test_util.run_in_graph_and_eager_modes + def testAllCloseList(self): + with self.assertRaisesRegexp(AssertionError, r"not close dif"): + self.assertAllClose([0], [1]) + @test_util.run_in_graph_and_eager_modes def testAllCloseDictToNonDict(self): with self.assertRaisesRegexp(ValueError, r"Can't compare dict to non-dict"): @@ -455,6 +460,9 @@ class TestUtilTest(test_util.TensorFlowTestCase): self.assertAllEqual([120] * 3, k) self.assertAllEqual([20] * 3, j) + with self.assertRaisesRegexp(AssertionError, r"not equal lhs"): + self.assertAllEqual([0] * 3, k) + @test_util.run_in_graph_and_eager_modes def testAssertNotAllClose(self): # Test with arrays -- GitLab From 2586eb3bfeeef3af357e438ae5aff92d2bac12a5 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Mon, 3 Sep 2018 11:48:35 +0800 Subject: [PATCH 0070/1357] Code fix against ci_build error results. --- tensorflow/cc/gradients/nn_grad_test.cc | 3 +- tensorflow/core/kernels/relu_op.cc | 8 +-- tensorflow/core/kernels/relu_op.h | 8 +-- tensorflow/core/kernels/relu_op_functor.h | 1 - .../python/kernel_tests/relu_op_test.py | 50 +++++++++---------- .../tools/api/golden/v1/tensorflow.pbtxt | 4 ++ 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index bf0db1f59d..d8c2a1a0fc 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -180,8 +180,7 @@ TEST_F(NNGradTest, LeakyReluGradGrad) { // Avoid input values where Leaky ReLU gradient is not well defined (around // zero). Tensor x_init_value = test::AsTensor( - {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, - {5, 2}); + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2}); Tensor features = test::AsTensor( {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, {5, 2}); diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index c4f2ef5632..cafa49cbb6 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -106,15 +106,15 @@ namespace functor { \ template <> \ void LeakyRelu::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor features, \ - T alpha, typename TTypes::Tensor activations); \ + const GPUDevice& d, typename TTypes::ConstTensor features, T alpha, \ + typename TTypes::Tensor activations); \ extern template struct LeakyRelu; \ \ template <> \ void LeakyReluGrad::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor gradients, \ - typename TTypes::ConstTensor features, \ - T alpha, typename TTypes::Tensor backprops); \ + typename TTypes::ConstTensor features, T alpha, \ + typename TTypes::Tensor backprops); \ extern template struct LeakyReluGrad; \ \ template <> \ diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index c55190065c..fa79ab03ae 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -143,8 +143,8 @@ class LeakyReluOp : public UnaryElementWiseOp> { void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { functor::LeakyRelu functor; - functor(context->eigen_device(), input.flat(), - alpha_, output->flat()); + functor(context->eigen_device(), input.flat(), alpha_, + output->flat()); } private: @@ -183,7 +183,9 @@ class LeakyReluGradOp template void LeakyReluGradOp::OperateNoTemplate(OpKernelContext* context, - const Tensor& g, const Tensor& a, T alpha, Tensor* output) { + const Tensor& g, + const Tensor& a, T alpha, + Tensor* output) { if (!ReluHelpers::ValidateSameSize(context, g, a)) return; functor::LeakyReluGrad functor; functor(context->eigen_device(), g.flat(), a.flat(), alpha, diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 7f0951451d..548d5a277d 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -91,7 +91,6 @@ struct Relu6Grad { } }; - // Functor used by LeakyReluOp to do the computations. template struct LeakyRelu { diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 7066f28883..3e24b8a2c4 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -323,37 +323,37 @@ class LeakyReluTest(test.TestCase): def testGradGradFloat32(self): with compat.forward_compatibility_horizon(2018, 10, 2): with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - name="x") - y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float32, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("leaky_relu (float32) gradient of gradient err = ", err) self.assertLess(err, 1e-4) def testGradGradFloat64(self): with compat.forward_compatibility_horizon(2018, 10, 2): with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - dtype=dtypes.float64, - name="x") - y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float64, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("leaky_relu (float64) gradient of gradient err = ", err) self.assertLess(err, 1e-10) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 4de662fe33..9e8d320f06 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1324,6 +1324,10 @@ tf_module { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "leaky_relu" + argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], " + } member_method { name: "less" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Mon, 3 Sep 2018 12:10:51 +0800 Subject: [PATCH 0071/1357] Add XLA support for LeakyReluOp. Code contributed by: Meng Chen --- tensorflow/compiler/tests/binary_ops_test.py | 7 ++++ tensorflow/compiler/tests/unary_ops_test.py | 5 +++ tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 0aafda7fb4..8941dd4e27 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -178,6 +178,13 @@ class BinaryOpsTest(xla_test.XLATestCase): [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) + self._testBinary( + gen_nn_ops._leaky_relu_grad, + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), + np.array( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), + expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype)) + self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 73adb0d243..91f876fa23 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -361,6 +361,11 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[-0.05, 6.05, 5]], dtype=dtype), expected=np.array([[0, 6, 5]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + nn_ops.leaky_relu, + np.array([[-1.0, 1.0]], dtype=dtype), + expected=np.array([[-0.2, 1.0]], dtype=dtype)) + self._assertOpOutputMatchesExpected( nn_ops.softmax, np.array([1, 2, 3, 4], dtype=dtype), diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index d35777ccb1..ec14735884 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,6 +50,24 @@ class Relu6Op : public XlaOpKernel { } }; + +class LeakyReluOp : public XlaOpKernel { + public: + explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); + } + // Compute the max of the input x and alpha*x. + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), + static_cast(alpha_)); + ctx->SetOutput(0, + xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); + } + private: + float alpha_; +}; + class ReluGradOp : public XlaOpKernel { public: explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} @@ -84,10 +102,34 @@ class Relu6GradOp : public XlaOpKernel { } }; +class LeakyReluGradOp : public XlaOpKernel { + public: + explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); + } + // Return the lhs (incoming gradient) if the rhs (input feature) > 0, + // otherwise return the alpha * lhs. + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* b = ctx->builder(); + const TensorShape shape = ctx->InputShape(0); + const auto zero = + xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); + const auto pred = xla::Gt(ctx->Input(1), zero); + auto alpha = XlaHelpers::FloatLiteral(b, input_type(0), + static_cast(alpha_)); + ctx->SetOutput(0, + xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); + } + private: + float alpha_; +}; + REGISTER_XLA_OP(Name("Relu"), ReluOp); REGISTER_XLA_OP(Name("Relu6"), Relu6Op); +REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp); REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp); REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp); +REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp); } // namespace } // namespace tensorflow -- GitLab From 6712df7f3c73bfabab51e7c7eed2130d7bcff6ec Mon Sep 17 00:00:00 2001 From: Pan Daoxin Date: Tue, 4 Sep 2018 16:18:40 +0800 Subject: [PATCH 0072/1357] Add MklSlice op. --- tensorflow/core/BUILD | 2 + tensorflow/core/graph/mkl_layout_pass.cc | 19 + tensorflow/core/graph/mkl_layout_pass_test.cc | 18 + tensorflow/core/kernels/BUILD | 9 + tensorflow/core/kernels/mkl_slice_op.cc | 356 ++++++++++++++++++ tensorflow/core/ops/array_ops.cc | 54 +++ 6 files changed, 458 insertions(+) create mode 100644 tensorflow/core/kernels/mkl_slice_op.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5c314f359c..47f16ac747 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1337,6 +1337,7 @@ cc_library( "//tensorflow/core/kernels:mkl_pooling_ops", "//tensorflow/core/kernels:mkl_relu_op", "//tensorflow/core/kernels:mkl_reshape_op", + "//tensorflow/core/kernels:mkl_slice_op", "//tensorflow/core/kernels:mkl_softmax_op", "//tensorflow/core/kernels:mkl_tfconv_op", "//tensorflow/core/kernels:mkl_aggregate_ops", @@ -3758,6 +3759,7 @@ tf_cc_test_mkl( "//tensorflow/core/kernels:mkl_pooling_ops", "//tensorflow/core/kernels:mkl_relu_op", "//tensorflow/core/kernels:mkl_reshape_op", + "//tensorflow/core/kernels:mkl_slice_op", "//tensorflow/core/kernels:mkl_softmax_op", "//tensorflow/core/kernels:mkl_tfconv_op", ]), diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 2e644fe987..50fd6bae12 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2447,6 +2447,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.tanh = "Tanh"; csinfo_.tanh_grad = "TanhGrad"; csinfo_.reshape = "Reshape"; + csinfo_.slice = "Slice"; csinfo_.softmax = "Softmax"; csinfo_.split = "Split"; // Element-wise ops. Ensure you also add any new ops to IsOpElementWise @@ -2554,6 +2555,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.reshape, mkl_op_registry::GetMklOpName(csinfo_.reshape), CopyAttrsReshape, AlwaysRewrite}); + rinfo_.push_back({csinfo_.slice, + mkl_op_registry::GetMklOpName(csinfo_.slice), + CopyAttrsSlice, AlwaysRewrite}); rinfo_.push_back({csinfo_.softmax, mkl_op_registry::GetMklOpName(csinfo_.softmax), CopyAttrsDataType, AlwaysRewrite}); @@ -2673,6 +2677,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string tanh; string tanh_grad; string reshape; + string slice; string softmax; string split; string squared_difference; @@ -3131,6 +3136,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb); + static void CopyAttrsSlice(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb); // Generate a graph node in graph 'g' representing a dummy Mkl tensor node, @@ -3734,6 +3740,19 @@ void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node, nb->Attr("Tshape", Tshape); } +void MklLayoutRewritePass::CopyAttrsSlice(const Node* orig_node, + NodeBuilder* nb) { + DataType T; + DataType Index; + + // Get all attributes from old node. + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Index", &Index)); + // Add attributes to new node. + nb->Attr("T", T); + nb->Attr("Index", Index); +} + void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb) { DataType T; diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index e8bac847e5..cccef5a03a 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -3510,6 +3510,24 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) { "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1"); } +TEST_F(MklLayoutPassTest, NodeRewrite_Slice_DeviceTest) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Int32Input'}" + "node { name: 'D' op: 'Slice'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Index' value { type: DT_INT32 } }" + " input: ['A', 'B', 'C'] }" + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['A', 'D'] }"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);C(Int32Input);" + "D(_MklSlice);DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A->E;" + "A:control->DMT/_0:control;A:control->DMT/_1:control;A:control->DMT/_2:control;" + "B->D:1;C->D:2;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); +} + ///////////////////////////////////////////////////////////////////// // Post-rewrite fixup pass test diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 25063ac823..2582814d08 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6328,6 +6328,15 @@ tf_mkl_kernel_library( deps = ARRAY_DEPS + mkl_deps(), ) +tf_mkl_kernel_library( + name = "mkl_slice_op", + prefix = "mkl_slice_op", + deps = ARRAY_DEPS + if_mkl([ + "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", + ]), +) + tf_mkl_kernel_library( name = "mkl_identity_op", prefix = "mkl_identity_op", diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc new file mode 100644 index 0000000000..86fb572478 --- /dev/null +++ b/tensorflow/core/kernels/mkl_slice_op.cc @@ -0,0 +1,356 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. + +#ifdef INTEL_MKL +#ifndef INTEL_MKL_ML_ONLY + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/prefetch.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#include "mkldnn.hpp" +#include "tensorflow/core/util/mkl_util.h" + +using mkldnn::stream; +using mkldnn::view; + +namespace tensorflow { + +namespace { + +gtl::InlinedVector IntTensorToInt64Vec(const Tensor& tensor) { + gtl::InlinedVector out; + if (tensor.dtype() == DT_INT32) { + for (int64 i = 0; i < tensor.NumElements(); ++i) { + out.push_back(tensor.flat()(i)); + } + } else if (tensor.dtype() == DT_INT64) { + for (int64 i = 0; i < tensor.NumElements(); ++i) { + out.push_back(tensor.flat()(i)); + } + } else { + LOG(FATAL) << "begin must be either int32 or int64"; + } + return out; +} + +} // namespace + +typedef Eigen::ThreadPoolDevice CPUDevice; + +// A version of SharedValidation (slice_op.h) written for input that is in +// either Mkl layout or Tensorflow layout. +static void ValidateMklInputs(OpKernelContext* context, bool* is_identity, + gtl::InlinedVector* begin, + gtl::InlinedVector* size) { + const int kInputTensorIndex = 0; + const int kInputBeginIndex = 1; + const int kInputSizeIndex = 2; + const Tensor& input = MklGetInput(context, kInputTensorIndex); + const Tensor& begin_tensor = MklGetInput(context, kInputBeginIndex); + const Tensor& size_tensor = MklGetInput(context, kInputSizeIndex); + + MklDnnShape input_mkl_shape, begin_mkl_shape, size_mkl_shape; + GetMklShape(context, kInputTensorIndex, &input_mkl_shape); + GetMklShape(context, kInputBeginIndex, &begin_mkl_shape); + GetMklShape(context, kInputSizeIndex, &size_mkl_shape); + + // Begin and size tensors cannot be in MklDnn layout. + CHECK_EQ(begin_mkl_shape.IsMklTensor(), false); + CHECK_EQ(size_mkl_shape.IsMklTensor(), false); + + TensorShape input_tf_shape = input_mkl_shape.IsMklTensor() + ? input_mkl_shape.GetTfShape() + : input.shape(); + + OP_REQUIRES( + context, context->op_kernel().IsLegacyVector(begin_tensor.shape()) && + context->op_kernel().IsLegacyVector(size_tensor.shape()) && + begin_tensor.NumElements() == input_tf_shape.dims() && + size_tensor.NumElements() == input_tf_shape.dims(), + errors::InvalidArgument( + "Expected begin and size arguments to be 1-D tensors of size ", + input_tf_shape.dims(), ", but got shapes ", + begin_tensor.shape().DebugString(), " and ", + size_tensor.shape().DebugString(), " instead.")); + + const int input_dims = input_tf_shape.dims(); + *begin = IntTensorToInt64Vec(begin_tensor); + *size = IntTensorToInt64Vec(size_tensor); + for (int i = 0; i < input_dims; ++i) { + if ((*size)[i] == -1) { + // A size[i] of -1 means "all elements from begin[i] to dim_size(i)". + (*size)[i] = input_tf_shape.dim_size(i) - (*begin)[i]; + } + } + + *is_identity = true; + for (int i = 0; i < input_dims; ++i) { + int64 b = (*begin)[i]; + int64 s = (*size)[i]; + if (input_tf_shape.dim_size(i) == 0) { + OP_REQUIRES( + context, b == 0 && s == 0, + errors::InvalidArgument("Expected begin[", i, "] == 0 (got ", b, + ") and size[", i, "] == 0 ", "(got ", s, + ") when ", "input.dim_size(", i, ") == 0")); + } else { + OP_REQUIRES(context, 0 <= b && b <= input_tf_shape.dim_size(i), + errors::InvalidArgument("Expected begin[", i, "] in [0, ", + input_tf_shape.dim_size(i), + "], but got ", b)); + OP_REQUIRES(context, 0 <= s && b + s <= input_tf_shape.dim_size(i), + errors::InvalidArgument("Expected size[", i, "] in [0, ", + input_tf_shape.dim_size(i) - b, + "], but ", "got ", s)); + } + const bool take_all = (b == 0) && (s == input_tf_shape.dim_size(i)); + (*is_identity) &= take_all; + } +} + +// A version of SharedSliceCommonCases function written for input tensor +// that may be in MklDnn layout or in Tensorflow layout. +template +static void CheckCommonCasesForMklInputs(OpKernelContext* context, + gtl::InlinedVector* begin, + gtl::InlinedVector* size, + bool* done) { + bool is_identity = true; + *done = false; + + ValidateMklInputs(context, &is_identity, begin, size); + if (!context->status().ok()) return; + + const Tensor& input = MklGetInput(context, 0); + MklDnnShape input_mkl_shape; + GetMklShape(context, 0, &input_mkl_shape); + + if (is_identity) { + VLOG(1) << "Slice identity"; + context->set_output(0, input); + // Mkl metadata tensor in this case can just be forwarded from input to + // output. + AllocateOutputSetMklShape(context, 0, input_mkl_shape); + *done = true; + return; + } +} + +// MKL-DNN implementation of Slice +template +class MklDnnSliceOp : public OpKernel { + public: + explicit MklDnnSliceOp(OpKernelConstruction* context) : OpKernel(context) {} + + ~MklDnnSliceOp() {} + + void Compute(OpKernelContext* context) override { + gtl::InlinedVector begin; + gtl::InlinedVector size; + bool done = false; + + CheckCommonCasesForMklInputs(context, &begin, &size, &done); + if (!context->status().ok() || done == true) return; + + // MKL-DNN does not have this limitation of supporting less than 8 dimension + // tensor. But we are mimicking functionality of Eigen Slice op for CPU. + if (begin.size() >= 8) { + OP_REQUIRES( + context, false, + errors::Unimplemented("MklDnnSliceOp : Unhandled input dimensions")); + } + + ComputeMklDnnSlice(context, begin, size); + return; + } + + private: + // Slice op implemented using MKL-DNN APIs. + void ComputeMklDnnSlice(OpKernelContext* context, + const gtl::InlinedVector& begin, + const gtl::InlinedVector& size) { + try { + // MKL-DNN API usage below is guided by description at: + // https://github.com/01org/mkl-dnn/issues/69 + // + // Relevant part of the description is copied below: + // + // Let's say you want to copy a part of memory into another buffer (and + // probably change the format). Then your steps are: + // + // 1. create memory primitive descriptor in_mem_pd and memory primitive + // in_mem_p for the entire source data. + // 2. create view primitive descriptor in_submem_pd based on in_mem_pd, + // initial offsets, and sub-sizes + // 3. create memory primitive descriptor out_mem_pd and memory primitive + // out_mem_p for the output (the logical sizes should much sub-sizes + // used in step 2, but the format might be arbitrary) + // 4. create reorder primitive descriptor reorder_pd based on in_submem_pd + // and out_mem_pd + // 5. create reorder primitive itself based on reorder_pd, in_mem_p, and + // out_mem_p. + // + // Please notice that there is no view primitive. There is only view + // primitive descriptor. And the reorder uses source memory as input but + // traverses it according to a view in_submem_pd. + + auto cpu_engine = engine(engine::cpu, 0); + MklDnnData src(&cpu_engine); + MklDnnData output(&cpu_engine); + + // Populate offsets and sizes in memory::dims format based on vector. + memory::dims begin_dims = {}; + begin_dims.resize(begin.size()); + for (size_t i = 0; i < begin.size(); ++i) begin_dims[i] = begin[i]; + memory::dims size_dims = {}; + bool empty = false; + size_dims.resize(size.size()); + for (size_t i = 0; i < size.size(); ++i) { + size_dims[i] = size[i]; + if (size_dims[i] == 0) empty = true; + } + + Tensor* output_tensor = nullptr; + MklDnnShape output_mkl_shape; + if (empty) { // for empty dims + auto shape_to = MklDnnDimsToTFShape(size_dims); + AllocateOutputSetMklShape(context, 0, &output_tensor, shape_to, + output_mkl_shape); + return; + } + + // Step 1 (as per above description) - Create memory for user data. + // We use blocked format here to describe input tensor. + const Tensor& input_tensor = MklGetInput(context, 0); + MklDnnShape input_mkl_shape; + GetMklShape(context, 0, &input_mkl_shape); + + if (input_mkl_shape.IsMklTensor()) { + auto input_mkl_format = input_mkl_shape.GetTfDataFormat(); + auto input_tf_format = MklDnnDataFormatToTFDataFormat(input_mkl_format); + begin_dims = MklDnnDimsInNCHW(begin_dims, input_tf_format); + size_dims = MklDnnDimsInNCHW(size_dims, input_tf_format); + } + + // Initialize input dimensions and strides to be used when input is not in + // MklDnn layout. + memory::dims input_dims, input_strides; + if (!input_mkl_shape.IsMklTensor()) { + input_dims = TFShapeToMklDnnDims(input_tensor.shape()); + input_strides = CalculateTFStrides(input_dims); + } + + // Create input memory descriptor. + auto input_md = + input_mkl_shape.IsMklTensor() + ? input_mkl_shape.GetMklLayout() + : MklDnnData::CreateBlockedMemDesc(input_dims, input_strides); + src.SetUsrMem(input_md, &input_tensor); + + // Step 2 - create view primitive descriptor + auto view_pd = + view::primitive_desc(src.GetUsrMemPrimDesc(), size_dims, begin_dims) + .dst_primitive_desc(); + auto output_strides = CalculateTFStrides(size_dims); + auto output_md = + MklDnnData::CreateBlockedMemDesc(size_dims, output_strides); + auto output_pd = memory::primitive_desc(output_md, cpu_engine); + + // Step 3 - Create memory for output. If input is in MklDnn layout, then + // output is also in MklDnn layout. Otherwise, output is in Tensorflow + // layout. + AllocateOutputTensor(context, input_mkl_shape, &output_pd, size_dims, + &output_tensor, &output_mkl_shape); + CHECK_NOTNULL(output_tensor); + CHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor()); + output.SetUsrMem(output_md, output_tensor); + + std::vector net; + // Step 4 - create reorder primitive desc between view_pd and output_pd. + auto reorder_pd = + reorder::primitive_desc(view_pd, output.GetUsrMemPrimDesc()); + // Step 5 - create reorder primitive itself. + net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *output.GetUsrMem())); + stream(stream::kind::eager).submit(net).wait(); + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + void AllocateOutputTensor(OpKernelContext* context, + const MklDnnShape& input_mkl_shape, + memory::primitive_desc* output_pd, + const memory::dims& output_dims, + Tensor** output_tensor, + MklDnnShape* output_mkl_shape) { + CHECK_NOTNULL(output_tensor); + CHECK_NOTNULL(output_mkl_shape); + + TensorShape output_tf_shape; + + if (input_mkl_shape.IsMklTensor()) { + // Since input tensor is in Mkl layout, output tensor will be in Mkl + // layout. + + // Allocate shape of Mkl tensor. + output_mkl_shape->SetMklTensor(true); + output_mkl_shape->SetMklLayout(output_pd); + output_mkl_shape->SetElemType(MklDnnType()); + output_mkl_shape->SetTfLayout(input_mkl_shape.GetDimension(), output_dims, + input_mkl_shape.GetTfDataFormat()); + + output_tf_shape.AddDim((output_pd->get_size() / sizeof(T)) + 1); + } else { + // If input is not in Mkl layout, then output won't be in Mkl layout. + output_mkl_shape->SetMklTensor(false); + output_tf_shape = MklDnnDimsToTFShape(output_dims); + } + + AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape, + *output_mkl_shape); + } +}; + +// MKL-DNN Slice registration +#define REGISTER_MKL_SLICE(type) \ + REGISTER_KERNEL_BUILDER(Name("_MklSlice") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("begin") \ + .HostMemory("size") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklDnnSliceOp); + +TF_CALL_float(REGISTER_MKL_SLICE); +#undef REGISTER_MKL_SLICE + +} // namespace tensorflow + +#endif // INTEL_MKL_DNN +#endif // INTEL_MKL diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 7dbb18aa5d..18cc529a9b 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -1648,6 +1648,60 @@ REGISTER_OP("Slice") return Status::OK(); }); +#ifdef INTEL_MKL +REGISTER_OP("_MklSlice") + .Input("input: T") + .Input("begin: Index") + .Input("size: Index") + .Input("mkl_input: uint8") + .Input("mkl_begin: uint8") + .Input("mkl_size: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("T: type") + .Attr("Index: {int32,int64}") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle input = c->input(0); + ShapeHandle begin_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &begin_shape)); + ShapeHandle sizes_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &sizes_shape)); + + // Merge to check compatibility of begin and sizes tensors. + TF_RETURN_IF_ERROR(c->Merge(begin_shape, sizes_shape, &begin_shape)); + + DimensionHandle ndims = c->Dim(begin_shape, 0); + if (c->ValueKnown(ndims)) { + TF_RETURN_IF_ERROR(c->WithRank(input, c->Value(ndims), &input)); + } + + // NOTE(mrry): Use MakeShapeFromShapeTensor to handle partially-known + // values, even though the `begin` value does not represent a shape. + ShapeHandle begin_value; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value)); + + // NOTE(mrry): We can't use `MakeShapeFromShapeTensor` for `sizes` because + // it might contain -1, which can't be represented -1 in the ShapeHandle + // would meqan "unknown". + const Tensor* sizes_value = c->input_tensor(3); + + if (sizes_value != nullptr) { + TF_RETURN_IF_ERROR( + c->WithRank(begin_value, sizes_value->NumElements(), &begin_value)); + std::vector dims; + // If the begin and sizes tensors are available, then + // we can be precise about the shape of the output. + if (sizes_value->dtype() == DT_INT64) { + TF_RETURN_IF_ERROR( + SliceHelper(c, begin_value, sizes_value, &dims)); + } else { + TF_RETURN_IF_ERROR( + SliceHelper(c, begin_value, sizes_value, &dims)); + } + } + }); +#endif + REGISTER_OP("StridedSlice") .Input("input: T") .Input("begin: Index") -- GitLab From 4644d186e1cc5862d152a6be4df4443c46f76b4a Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Tue, 4 Sep 2018 07:28:44 -0700 Subject: [PATCH 0073/1357] Added the api for the bridge --- third_party/ngraph/ngraph_tf.BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index c1221cc385..a8d1fdc194 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -10,6 +10,8 @@ load( cc_library( name = "ngraph_tf", srcs = [ + "src/ngraph_api.h", + "src/ngraph_api.cc", "src/ngraph_assign_clusters.h", "src/ngraph_assign_clusters.cc", "src/ngraph_builder.h", -- GitLab From a65c6c17d0705fe11be6f33f63a677106bf26ffb Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Tue, 4 Sep 2018 12:34:14 -0700 Subject: [PATCH 0074/1357] Updated the unit test files --- third_party/ngraph/ngraph_tf.BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index a8d1fdc194..7577a4014d 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -68,6 +68,8 @@ tf_cc_test( "test/conversions.cpp", "test/padding.cpp", "test/graph_rewrites/assign_clusters.cc", + "test/test_utilities.h", + "test/test_utilities.cpp", "test/main.cpp", ], deps = [ -- GitLab From fa20b59b920233d35bb8da3fbc3c234c369a8291 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 4 Sep 2018 14:20:40 -0700 Subject: [PATCH 0075/1357] Move CUDA-specific occupancy calculation into proper file -Maintain functionality, just move CalculateOccupancy() and CompareOccupancy() methods from device_description to cuda_gpu_executor -Remove CUDA requirement in general class device_description --- .../stream_executor/cuda/cuda_gpu_executor.cc | 37 +++++++++++++++++++ .../stream_executor/cuda/cuda_gpu_executor.h | 11 ++++++ .../stream_executor/device_description.cc | 32 ---------------- .../stream_executor/device_description.h | 17 --------- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 39b0696c93..458c0e3030 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -490,6 +490,43 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, } } +// Compute and return maximum blocks per core (occupancy) based on the +// device description, some kernel characteristics and the number of threads per +// block. If unable to compute occupancy, zero is returned. +int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + return suggested_blocks; +} + +// Compute and return the suggested thread count to acheive ideal occupancy. +// If the provided thread dimensions match this number, zero is returned. +int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + if (suggested_blocks > *initial_blocks) { + *initial_blocks = suggested_blocks; + return suggested_threads; + } else { + return 0; + } +} + void *CUDAExecutor::Allocate(uint64 size) { return CUDADriver::DeviceAllocate(context_, size); } diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 8a954d5461..e8ebbc3220 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -70,6 +70,17 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const BlockDim &block_dims, const KernelBase &k, const KernelArgsArrayBase &args) override; + int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + + int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + void *Allocate(uint64 size) override; void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index df52ce6cce..726c4adf74 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -157,36 +157,4 @@ static uint64 RoundDown(uint64 value, uint64 n) { return port::MathUtil::FloorOfRatio(value, n) * n; } -int CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { - int suggested_blocks = 0; - int suggested_threads = 0; - CUresult err = - cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, - func, NULL, shared_memory_per_block, 0); - CHECK_EQ(err, CUDA_SUCCESS); - return suggested_blocks; -} - -int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { - int suggested_blocks = 0; - int suggested_threads = 0; - CUresult err = - cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, - func, NULL, shared_memory_per_block, 0); - CHECK_EQ(err, CUDA_SUCCESS); - if (suggested_blocks > *initial_blocks) { - *initial_blocks = suggested_blocks; - return suggested_threads; - } else { - return 0; - } -} - } // namespace stream_executor diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index d335b9b875..b15ce31216 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -24,7 +24,6 @@ limitations under the License. #include #include "tensorflow/stream_executor/platform/port.h" -#include "tensorflow/stream_executor/cuda/cuda_driver.h" #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/platform/port.h" @@ -324,22 +323,6 @@ void CalculateDimensionality(const DeviceDescription &device_description, uint64 element_count, uint64 *threads_per_block, uint64 *block_count); -// Compute and return maximum blocks per core (occupancy) based on the -// device description, some kernel characteristics and the number of threads per -// block. If unable to compute occupancy, zero is returned. -int CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); - -// Compute and return the suggested thread count to acheive ideal occupancy. -// If the provided thread dimensions match this number, zero is returned. -int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); - } // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_ -- GitLab From cd6597b8fcd82b51ddb47a297972a1614c2a5d78 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 4 Sep 2018 16:17:40 -0700 Subject: [PATCH 0076/1357] Fixed transition typo --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 458c0e3030..a961e9a6c4 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -493,7 +493,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -int CalculateOccupancy(const DeviceDescription& device_description, +int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, const ThreadDim& thread_dims, CUfunction func) { @@ -508,7 +508,7 @@ int CalculateOccupancy(const DeviceDescription& device_description, // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. -int CompareOccupancy(int* initial_blocks, +int CUDAExecutor::CompareOccupancy(int* initial_blocks, const DeviceDescription& device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, -- GitLab From 475b7715f16ad0f94fa9986a0eefc1b2cf2044bd Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 4 Sep 2018 16:31:01 -0700 Subject: [PATCH 0077/1357] Recommended typo fix --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index a961e9a6c4..ce2f1ce3ae 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -483,7 +483,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread, smem_per_block, thread_dims, cufunc); if (suggested_threads != 0) { - VLOG(2) << "The cuda occupancy calculator reccommends using " + VLOG(2) << "The cuda occupancy calculator recommends using " << suggested_threads << " threads per block to acheive an occupancy of " << blocks_per_sm << " blocks per SM."; -- GitLab From a95281ce1b449d8f92a3799ff9c1dbf661b70bc4 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 5 Sep 2018 09:02:40 +0800 Subject: [PATCH 0078/1357] Avoid golden API file changing. --- tensorflow/cc/gradients/nn_grad_test.cc | 3 +-- tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt | 1 + tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index d8c2a1a0fc..f5a09e09dc 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -42,7 +42,6 @@ using ops::MaxPoolV2; using ops::Placeholder; using ops::Relu; using ops::Relu6; -using ops::LeakyRelu; using ops::Selu; using ops::Softmax; using ops::Softplus; @@ -165,7 +164,7 @@ TEST_F(NNGradTest, Relu6Grad) { TEST_F(NNGradTest, LeakyReluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); - auto y = LeakyRelu(scope_, x); + auto y = ops::internal::LeakyRelu(scope_, x); // Avoid input values where Leaky ReLU gradient is not well defined (around // zero). Tensor x_init_value = test::AsTensor( diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt index 4a61889f54..280148e032 100644 --- a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt @@ -1,4 +1,5 @@ op { graph_op_name: "LeakyRelu" + visibility: HIDDEN summary: "Computes rectified linear: `max(features, features * alpha)`." } diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 9e8d320f06..4de662fe33 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1324,10 +1324,6 @@ tf_module { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "leaky_relu" - argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], " - } member_method { name: "less" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 89979f42e827d9eb5c349259a5aa2ec32d38c86a Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 1 Sep 2018 16:07:46 +0000 Subject: [PATCH 0079/1357] Fix MPI build failure caused by StringPiece -> absl::string_view This fix tries to fix the MPI build failure caused by StringPiece -> absl::string_view. Signed-off-by: Yong Tang --- tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc | 4 ++-- tensorflow/contrib/mpi/mpi_rendezvous_mgr.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc index 6a7f5efecd..e195cca647 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc @@ -136,7 +136,7 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync( MPIRendezvousMgr* mgr = reinterpret_cast(this->rendezvous_mgr_); - mgr->QueueRequest(parsed.FullKey().ToString(), step_id_, + mgr->QueueRequest(string(parsed.FullKey()), step_id_, std::move(request_call), rendezvous_call); } @@ -258,7 +258,7 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request, std::function res = std::bind( send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call); - SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res)); + SendQueueEntry req(string(parsed.FullKey()), std::move(res)); this->QueueSendRequest(req); diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h index 5596601ddb..90140fcab3 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h @@ -71,7 +71,7 @@ class MPISendTensorCall { void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id, const bool is_dead) { - mRes_.set_key(parsed.FullKey().ToString()); + mRes_.set_key(string(parsed.FullKey())); mRes_.set_step_id(step_id); mRes_.mutable_response()->set_is_dead(is_dead); mRes_.mutable_response()->set_send_start_micros( -- GitLab From 39e324505c380c9d449dc31d34629a9d470c765f Mon Sep 17 00:00:00 2001 From: Jason Zaman Date: Tue, 4 Sep 2018 15:01:22 +0800 Subject: [PATCH 0080/1357] Add //tensorflow:install_headers target Used to prepare all the header files so they can easily be installed into /usr/include when packaging TF. Signed-off-by: Jason Zaman --- tensorflow/BUILD | 28 ++++++++++++++++++++++++++++ tensorflow/cc/BUILD | 28 ++++++++++++++++++++++++++-- tensorflow/core/BUILD | 19 ++++++++++++++++--- third_party/eigen3/BUILD | 10 ++-------- 4 files changed, 72 insertions(+), 13 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 661cba5ff0..768d4107d8 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -617,3 +617,31 @@ py_library( visibility = ["//visibility:public"], deps = ["//tensorflow/python:no_contrib"], ) + +genrule( + name = "install_headers", + srcs = [ + "//tensorflow/c:headers", + "//tensorflow/c/eager:headers", + "//tensorflow/cc:headers", + "//tensorflow/core:headers", + ], + outs = ["include"], + cmd = """ + mkdir $@ + for f in $(SRCS); do + d="$${f%/*}" + d="$${d#bazel-out*genfiles/}" + d="$${d#*external/eigen_archive/}" + + if [[ $${d} == *local_config_* ]]; then + continue + fi + + mkdir -p "$@/$${d}" + cp "$${f}" "$@/$${d}/" + done + """, + tags = ["manual"], + visibility = ["//visibility:public"], +) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index f56521dac0..b587e63227 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -10,11 +10,12 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", - "tf_cc_test", + "cc_library_with_android_deps", "tf_cc_binary", + "tf_cc_test", "tf_copts", "tf_gen_op_wrappers_cc", - "cc_library_with_android_deps", + "transitive_hdrs", ) cc_library( @@ -716,3 +717,26 @@ tf_cc_test( "//tensorflow/core:testlib", ], ) + +transitive_hdrs( + name = "headers", + visibility = ["//tensorflow:__subpackages__"], + deps = [ + ":cc_ops", + ":client_session", + ":coordinator", + ":gradient_checker", + ":gradients", + ":ops", + ":queue_runner", + ":remote_fused_graph_ops", + ":scope", + "//tensorflow/cc/profiler", + "//tensorflow/cc/saved_model:constants", + "//tensorflow/cc/saved_model:loader", + "//tensorflow/cc/saved_model:reader", + "//tensorflow/cc/saved_model:signature_constants", + "//tensorflow/cc/saved_model:tag_constants", + "//tensorflow/cc/tools:freeze_saved_model", + ], +) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5c314f359c..d5d4aad541 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -85,11 +85,12 @@ load( "tf_cc_tests", "tf_copts", "tf_cuda_library", + "tf_features_nomodules_if_android", "tf_gen_op_libs", "tf_generate_proto_text_sources", "tf_genrule_cmd_append_to_srcs", "tf_opts_nortti_if_android", - "tf_features_nomodules_if_android", + "transitive_hdrs", ) load("//tensorflow:tensorflow.bzl", "tf_cc_test_mkl") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") @@ -120,16 +121,16 @@ load( "tf_additional_libdevice_srcs", "tf_additional_minimal_lib_srcs", "tf_additional_mpi_lib_defines", - "tf_additional_proto_hdrs", "tf_additional_proto_compiler_hdrs", + "tf_additional_proto_hdrs", "tf_additional_proto_srcs", "tf_additional_test_deps", "tf_additional_test_srcs", "tf_additional_verbs_lib_defines", "tf_jspb_proto_library", "tf_kernel_tests_linkstatic", - "tf_lib_proto_parsing_deps", "tf_lib_proto_compiler_deps", + "tf_lib_proto_parsing_deps", "tf_nano_proto_library", "tf_platform_hdrs", "tf_platform_srcs", @@ -4691,6 +4692,18 @@ cc_library( ] + tf_additional_libdevice_deps(), ) +transitive_hdrs( + name = "headers", + visibility = ["//tensorflow:__subpackages__"], + deps = [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:stream_executor", + ], +) + # ----------------------------------------------------------------------------- # Google-internal targets go here (must be at the end). diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index 203991b50f..f072f2545a 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -66,19 +66,13 @@ genrule( outs = ["include"], cmd = """ mkdir $@ - for f in $(locations @eigen_archive//:eigen_header_files) ; do + for f in $(SRCS); do d="$${f%/*}" d="$${d#*external/eigen_archive/}" mkdir -p "$@/$${d}" cp "$${f}" "$@/$${d}/" done - - for f in $(locations :eigen_third_party_header_files) ; do - d="$${f%/*}" - - mkdir -p "$@/$${d}" - cp "$${f}" "$@/$${d}/" - done """, + tags = ["manual"], ) -- GitLab From 69d3b8faf41791834301a74a05e288964940427d Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Fri, 22 Jun 2018 23:09:43 -0500 Subject: [PATCH 0081/1357] [ROCm] bazel build system and continuous integration logic The commit contains following components to support TensorFlow on ROCm platform - bazel build system - continuous integration logic Authors: - Jack Chung: jack.chung@amd.com - Jeffrey Poznanovic: Jeffrey.Poznanovic@amd.com - Peng Sun: Peng.Sun@amd.com --- configure.py | 20 + tensorflow/core/BUILD | 4 +- tensorflow/core/kernels/BUILD | 3 +- tensorflow/tensorflow.bzl | 67 +- tensorflow/tools/ci_build/Dockerfile.rocm | 97 +++ .../tools/ci_build/builds/docker_test.sh | 9 +- tensorflow/tools/ci_build/builds/pip.sh | 4 +- .../tools/ci_build/builds/with_the_same_user | 6 + tensorflow/tools/ci_build/ci_build.sh | 11 +- .../tools/ci_build/linux/cpu/run_cc_core.sh | 1 + .../tools/ci_build/linux/cpu/run_py2_core.sh | 1 + .../ci_build/linux/cpu/run_py3_contrib.sh | 1 + .../tools/ci_build/linux/cpu/run_py3_core.sh | 1 + .../tools/ci_build/linux/libtensorflow.sh | 3 + .../tools/ci_build/linux/libtensorflow_cpu.sh | 1 + .../ci_build/linux/libtensorflow_docker.sh | 6 + .../ci_build/linux/libtensorflow_rocm.sh | 22 + .../tools/ci_build/linux/rocm/run_cc_core.sh | 39 ++ .../tools/ci_build/linux/rocm/run_py3_core.sh | 39 ++ .../tools/ci_build/osx/cpu/run_py2_cc_core.sh | 1 + .../tools/ci_build/osx/libtensorflow_cpu.sh | 1 + .../tools/ci_build/osx/libtensorflow_gpu.sh | 1 + .../tools/ci_build/osx/libtensorflow_rocm.sh | 36 + .../tools/ci_build/xla/linux/rocm/run_py3.sh | 41 ++ tensorflow/workspace.bzl | 2 + .../gpus/crosstool/CROSSTOOL_hipcc.tpl | 158 +++++ .../bin/crosstool_wrapper_driver_rocm.tpl | 241 +++++++ third_party/gpus/rocm/BUILD | 0 third_party/gpus/rocm/BUILD.tpl | 99 +++ third_party/gpus/rocm/build_defs.bzl.tpl | 32 + third_party/gpus/rocm/rocm_config.h.tpl | 21 + third_party/gpus/rocm_configure.bzl | 663 ++++++++++++++++++ tools/bazel.rc | 3 + 33 files changed, 1611 insertions(+), 23 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rocm create mode 100755 tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh create mode 100755 tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh create mode 100755 tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh create mode 100644 third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl create mode 100755 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl create mode 100644 third_party/gpus/rocm/BUILD create mode 100644 third_party/gpus/rocm/BUILD.tpl create mode 100644 third_party/gpus/rocm/build_defs.bzl.tpl create mode 100644 third_party/gpus/rocm/rocm_config.h.tpl create mode 100644 third_party/gpus/rocm_configure.bzl diff --git a/configure.py b/configure.py index 361bd4764d..4f998511aa 100644 --- a/configure.py +++ b/configure.py @@ -1521,6 +1521,13 @@ def main(): else: set_trisycl_include_dir(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False) + if (environ_cp.get('TF_NEED_ROCM') == '1' and + 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( + 'LD_LIBRARY_PATH') != '1'): + write_action_env_to_bazelrc('LD_LIBRARY_PATH', + environ_cp.get('LD_LIBRARY_PATH')) + set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): @@ -1561,6 +1568,19 @@ def main(): write_to_bazelrc('build --config=download_clang') write_to_bazelrc('test --config=download_clang') + # SYCL / ROCm / CUDA are mutually exclusive. + # At most 1 GPU platform can be configured. + gpu_platform_count = 0 + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_ROCM') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_CUDA') == '1': + gpu_platform_count += 1 + if gpu_platform_count >= 2: + raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. ' + 'At most 1 GPU platform can be configured.') + set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) if environ_cp.get('TF_NEED_MPI') == '1': set_mpi_home(environ_cp) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c06fea130f..d5dfb8c813 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -146,7 +146,7 @@ load( "if_static", "tf_cuda_tests_tags", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured") load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library") load( "//third_party/mkl:build_defs.bzl", @@ -2941,7 +2941,7 @@ tf_cuda_library( "platform/device_tracer.h", ], copts = tf_copts(), - cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(), + cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()), visibility = ["//visibility:private"], deps = [ ":core_cpu_internal", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 25063ac823..68fa8fa481 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -55,7 +55,8 @@ load( "if_mkl_ml", "mkl_deps", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured") +load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm", "if_rocm_is_configured") config_setting( # Add "--define tensorflow_xsmm=1" to your build command to use libxsmm for diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index adac895a17..f51a628ca3 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -17,8 +17,15 @@ load( ) load( "@local_config_cuda//cuda:build_defs.bzl", - "cuda_default_copts", "if_cuda", + "if_cuda_is_configured", + "cuda_default_copts", +) +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", + "if_rocm_is_configured", + "rocm_default_copts", ) load( "//third_party/mkl:build_defs.bzl", @@ -860,12 +867,14 @@ def tf_cuda_only_cc_test( srcs = srcs + tf_binary_additional_srcs(), size = size, args = args, - copts = _cuda_copts() + tf_copts(), + copts = _cuda_copts() + _rocm_copts() + tf_copts(), data = data + tf_binary_dynamic_kernel_dsos(kernels), - deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([ - clean_dep("//tensorflow/core:cuda"), - clean_dep("//tensorflow/core:gpu_lib"), - ]), + deps = deps + tf_binary_dynamic_kernel_deps(kernels) + + if_cuda_is_configured([ + clean_dep("//tensorflow/core:cuda"), + clean_dep("//tensorflow/core:gpu_lib")]) + + if_rocm_is_configured([ + clean_dep("//tensorflow/core:gpu_lib")]), linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name), linkstatic = linkstatic or select({ # cc_tests with ".so"s in srcs incorrectly link on Darwin @@ -1000,7 +1009,7 @@ register_extension_info( label_regex_for_dep = "{extension_name}", ) -def _cuda_copts(): +def _cuda_copts(opts = []): """Gets the appropriate set of copts for (maybe) CUDA compilation. If we're doing CUDA compilation, returns copts for our particular CUDA @@ -1016,13 +1025,31 @@ def _cuda_copts(): "@local_config_cuda//cuda:using_clang": ([ "-fcuda-flush-denormals-to-zero", ]), - }) + }) + if_cuda_is_configured(opts) + +def _rocm_copts(opts = []): + """Gets the appropriate set of copts for (maybe) ROCm compilation. + + If we're doing ROCm compilation, returns copts for our particular ROCm + compiler. If we're not doing ROCm compilation, returns an empty list. + + """ + return rocm_default_copts() + select({ + "//conditions:default": [], + "@local_config_rocm//rocm:using_hipcc": ([ + "", + ]) + }) + if_rocm_is_configured(opts) # Build defs for TensorFlow kernels # When this target is built using --config=cuda, a cc_library is built # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional # libraries needed by GPU kernels. +# +# When this target is built using --config=rocm, a cc_library is built +# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional +# libraries needed by GPU kernels. def tf_gpu_kernel_library( srcs, copts = [], @@ -1030,16 +1057,18 @@ def tf_gpu_kernel_library( deps = [], hdrs = [], **kwargs): - copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts() + copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + _rocm_copts(opts = cuda_copts) kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] native.cc_library( srcs = srcs, hdrs = hdrs, copts = copts, - deps = deps + if_cuda([ + deps = deps + if_cuda_is_configured([ clean_dep("//tensorflow/core:cuda"), clean_dep("//tensorflow/core:gpu_lib"), + ]) + if_rocm_is_configured([ + clean_dep("//tensorflow/core:gpu_lib"), ]), alwayslink = 1, **kwargs @@ -1075,11 +1104,13 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs) kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] native.cc_library( - deps = deps + if_cuda(cuda_deps + [ + deps = deps + if_cuda_is_configured(cuda_deps + [ clean_dep("//tensorflow/core:cuda"), - "@local_config_cuda//cuda:cuda_headers", + "@local_config_cuda//cuda:cuda_headers" + ]) + if_rocm_is_configured(cuda_deps + [ + "@local_config_rocm//rocm:rocm_headers" ]), - copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) + + copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), **kwargs @@ -1459,6 +1490,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ "@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cudart_static", ] + rocm_deps = [ + clean_dep("//tensorflow/core:stream_executor_headers_lib"), + ] deps = deps + tf_custom_op_library_additional_deps() if gpu_srcs: basename = name.split(".")[0] @@ -1467,13 +1501,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ srcs = gpu_srcs, copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]), features = if_cuda(["-use_header_modules"]), - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps) ) cuda_deps.extend([":" + basename + "_gpu"]) + rocm_deps.extend([":" + basename + "_gpu"]) check_deps( name = name + "_check_deps", - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps), disallowed_deps = [ clean_dep("//tensorflow/core:framework"), clean_dep("//tensorflow/core:lib"), @@ -1482,7 +1517,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ tf_cc_shared_object( name = name, srcs = srcs, - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps), data = if_static([name + "_check_deps"]), copts = tf_copts(is_external = True), features = ["windows_export_all_symbols"], diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm new file mode 100644 index 0000000000..aadaa8bac1 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -0,0 +1,97 @@ +# This Dockerfile provides a starting point for a ROCm installation of +# MIOpen and tensorflow. +FROM ubuntu:xenial +MAINTAINER Jeff Poznanovic + +ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/ +ARG ROCM_PATH=/opt/rocm + +ENV DEBIAN_FRONTEND noninteractive +ENV TF_NEED_ROCM 1 +ENV HOME /root/ +RUN apt update && apt install -y wget software-properties-common + +# Add rocm repository +RUN apt-get clean all +RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add - +RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list" + +# Install misc pkgs +RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + clang-3.8 \ + clang-format-3.8 \ + clang-tidy-3.8 \ + cmake \ + cmake-qt-gui \ + ssh \ + curl \ + apt-utils \ + pkg-config \ + g++-multilib \ + git \ + libunwind-dev \ + libfftw3-dev \ + libelf-dev \ + libncurses5-dev \ + libpthread-stubs0-dev \ + vim \ + gfortran \ + libboost-program-options-dev \ + libssl-dev \ + libboost-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + rpm \ + libnuma-dev \ + virtualenv \ + python-pip \ + python3-pip \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install rocm pkgs +RUN apt-get update --allow-insecure-repositories && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ + rocm-dev rocm-libs rocm-utils \ + rocfft miopen-hip miopengemm rocblas hipblas rocrand \ + rocm-profiler cxlactivitylogger && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git +RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb + +ENV HCC_HOME=$ROCM_PATH/hcc +ENV HIP_PATH=$ROCM_PATH/hip +ENV OPENCL_ROOT=$ROCM_PATH/opencl +ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}" +ENV PATH="$ROCM_PATH/bin:${PATH}" +ENV PATH="$OPENCL_ROOT/bin:${PATH}" + +# Add target file to help determine which device(s) to build for +RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst + +# Setup environment variables, and add those environment variables at the end of ~/.bashrc +ARG HCC_HOME=/opt/rocm/hcc +ARG HIP_PATH=/opt/rocm/hip +ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH + +# Copy and run the install scripts. +COPY install/*.sh /install/ +ARG DEBIAN_FRONTEND=noninteractive +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh +RUN /install/install_pip_packages.sh +RUN /install/install_bazel.sh +RUN /install/install_golang.sh + +# Set up the master bazelrc configuration file. +COPY install/.bazelrc /etc/bazel.bazelrc + +# Configure the build for our CUDA configuration. +ENV TF_NEED_ROCM 1 + diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh index e337ea4b05..38891b60e5 100755 --- a/tensorflow/tools/ci_build/builds/docker_test.sh +++ b/tensorflow/tools/ci_build/builds/docker_test.sh @@ -19,7 +19,7 @@ # # Usage: docker_test.sh # Arguments: -# IMAGE_TYPE : Type of the image: (CPU|GPU) +# IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM) # TAG : Docker image tag # WHL_PATH : Path to the whl file to be installed inside the docker image # @@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then DOCKERFILE="tensorflow/tools/docker/Dockerfile" elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu" +elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then + DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm" else die "Unrecognized image type: $1" fi @@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') GPU_EXTRA_PARAMS="${devices} ${libs}" +elif [ "${IMAGE_TYPE}" == "rocm" ]; then + ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video" else GPU_EXTRA_PARAMS="" + ROCM_EXTRA_PARAMS="" fi # Run docker image with source directory mapped docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \ -${GPU_EXTRA_PARAMS} \ +${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \ "${DOCKER_IMG_TAG}" \ /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\ "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\ diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index fef121ab5a..6543779022 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}" PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" GPU_FLAG="" if [[ ${CONTAINER_TYPE} == "cpu" ]] || \ + [[ ${CONTAINER_TYPE} == "rocm" ]] || \ [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \ die "Build failed." @@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then die "ERROR: Cannot find repaired wheel." fi # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so - elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then + elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \ + [[ ${CONTAINER_TYPE} == "rocm" ]]; then WHL_PATH=${AUDITED_WHL_NAME} cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH} echo "Copied manylinx1 wheel file at ${WHL_PATH}" diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index b216e3549f..1cc5aed15d 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ usermod -a -G sudo "${CI_BUILD_USER}" echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo +if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then + # ROCm requires the video group in order to use the GPU for compute. If it + # exists on the host, add it to the container. + getent group video || addgroup video && adduser "${CI_BUILD_USER}" video +fi + if [ -e /root/.bazelrc ]; then cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc" chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc" diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 77265e0f50..eab0616513 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -18,7 +18,7 @@ # # # CONTAINER_TYPE: Type of the docker container used the run the build: -# e.g., (cpu | gpu | android | tensorboard) +# e.g., (cpu | gpu | rocm | android | tensorboard) # # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. # If this optional value is not supplied (via the @@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then GPU_EXTRA_PARAMS="" fi +# Add extra params for rocm devices and libraries for ROCm container. +if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then + ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video" +else + ROCM_EXTRA_PARAMS="" +fi + + # Determine the docker image name DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}" @@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ + ${ROCM_EXTRA_PARAMS} \ ${CI_DOCKER_EXTRA_PARAMS[@]} \ "${DOCKER_IMG_NAME}" \ ${CI_COMMAND_PREFIX[@]} \ diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh index 8eeddcdb82..3b5c92d148 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' # Only running cc tests, python version does not matter. export PYTHON_BIN_PATH=`which python` diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh index 8eca1987f0..52eff6330f 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python2` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh index f6fa9251d4..d12027599a 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python3` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh index 51eb2cd7e6..7c531a4d68 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python3` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/libtensorflow.sh b/tensorflow/tools/ci_build/linux/libtensorflow.sh index beef8e063b..3b6e15feb9 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh @@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-" if [ "${TF_NEED_CUDA}" == "1" ]; then SUFFIX="-gpu-linux-" fi +if [ "${TF_NEED_ROCM}" == "1" ]; then + SUFFIX="-rocm-linux-" +fi build_libtensorflow_tarball "${SUFFIX}$(uname -m)" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh index 4bf34dd299..b76262b6e9 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh @@ -19,4 +19,5 @@ set -ex SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 "${SCRIPT_DIR}/libtensorflow_docker.sh" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index 60c974c36b..467b8dc808 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then DOCKER_BINARY="nvidia-docker" DOCKER_FILE="Dockerfile.gpu" fi +if [ "${TF_NEED_ROCM}" == "1" ]; then + DOCKER_IMAGE="tf-tensorflow-rocm" + DOCKER_BINARY="docker" + DOCKER_FILE="Dockerfile.rocm" +fi docker build \ -t "${DOCKER_IMAGE}" \ @@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \ + -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \ -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh new file mode 100755 index 0000000000..c1ebbe3630 --- /dev/null +++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script to build a binary releases of libtensorflow with GPU support. + +set -ex +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +export TF_NEED_ROCM=1 +"${SCRIPT_DIR}/libtensorflow_docker.sh" diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh new file mode 100755 index 0000000000..200089f90e --- /dev/null +++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py + +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ + --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \ + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh new file mode 100755 index 0000000000..1d0b838c1b --- /dev/null +++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py + +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ + --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \ + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh index c7cc16e669..adee0d3171 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh @@ -27,6 +27,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index 9ae5fc6bea..06798adc03 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh" export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index d95fcdeb85..95f1992d7d 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -27,6 +27,7 @@ export TF_NEED_CUDA=1 export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}" export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 +export TF_NEED_ROCM=0 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh new file mode 100755 index 0000000000..aeabc0e39e --- /dev/null +++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script to produce binary release of libtensorflow (C API, Java jars etc.). + +set -ex +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# See comments at the top of this file for details. +source "${SCRIPT_DIR}/../builds/libtensorflow.sh" + +# Configure script +export TF_NEED_ROCM=1 +export PYTHON_BIN_PATH="/usr/bin/python" +export TF_NEED_GCP=0 +export TF_NEED_HDFS=0 +export TF_NEED_CUDA=0 +export TF_NEED_OPENCL_SYCL=0 +export TF_NEED_MKL=0 +export COMPUTECPP_PATH="/usr/local" + +export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +build_libtensorflow_tarball "-gpu-darwin-$(uname -m)" diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh new file mode 100755 index 0000000000..a0de128020 --- /dev/null +++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py +echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc + +bazel clean +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \ + --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 \ + --config=xla -- \ + //tensorflow/compiler/... diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 1e7c5d6790..87d1243563 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,6 +1,7 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") load("//third_party:nccl/nccl_configure.bzl", "nccl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") @@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): sycl_configure(name = "local_config_sycl") syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") + rocm_configure(name="local_config_rocm") initialize_third_party() diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl new file mode 100644 index 0000000000..0e175b3ef6 --- /dev/null +++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl @@ -0,0 +1,158 @@ +major_version: "local" +minor_version: "" +default_target_cpu: "same_as_host" + +default_toolchain { + cpu: "k8" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "piii" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "arm" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "ppc" + toolchain_identifier: "local_linux" +} + +toolchain { + abi_version: "local" + abi_libc_version: "local" + builtin_sysroot: "" + compiler: "compiler" + host_system_name: "local" + needsPic: true + supports_gold_linker: false + supports_incremental_linker: false + supports_fission: false + supports_interface_shared_objects: false + supports_normalizing_ar: false + supports_start_end_lib: false + supports_thin_archives: false + target_libc: "local" + target_cpu: "local" + target_system_name: "local" + toolchain_identifier: "local_linux" + + tool_path { name: "ar" path: "/usr/bin/ar" } + tool_path { name: "compat-ld" path: "/usr/bin/ld" } + tool_path { name: "cpp" path: "/usr/bin/cpp" } + tool_path { name: "dwp" path: "/usr/bin/dwp" } + # As part of the TensorFlow release, we place some ROCm-related compilation + # files in @local_config_rocm//crosstool/clang/bin, and this relative + # path, combined with the rest of our Bazel configuration causes our + # compilation to use those files. + tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" } + # Use "-std=c++11" for hipcc. For consistency, force both the host compiler + # and the device compiler to use "-std=c++11". + cxx_flag: "-std=c++11" + linker_flag: "-Wl,-no-as-needed" + linker_flag: "-lstdc++" + #linker_flag: "-B/usr/bin/" + linker_flag: "-B/opt/rocm/hcc/compiler/bin" + +%{host_compiler_includes} + tool_path { name: "gcov" path: "/usr/bin/gcov" } + + # C(++) compiles invoke the compiler (as that is the one knowing where + # to find libraries), but we provide LD so other rules can invoke the linker. + tool_path { name: "ld" path: "/usr/bin/ld" } + + tool_path { name: "nm" path: "/usr/bin/nm" } + tool_path { name: "objcopy" path: "/usr/bin/objcopy" } + objcopy_embed_flag: "-I" + objcopy_embed_flag: "binary" + tool_path { name: "objdump" path: "/usr/bin/objdump" } + tool_path { name: "strip" path: "/usr/bin/strip" } + + # Anticipated future default. + unfiltered_cxx_flag: "-no-canonical-prefixes" + + # Make C++ compilation deterministic. Use linkstamping instead of these + # compiler symbols. + unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" + unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" + unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__" + # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers + # It needs to be always set when compiling Eigen headers + # (irrespective of whether the source file is being compiled via HIPCC) + # so adding -DEIGEN_USE_HIP as a default CXX flag here + unfiltered_cxx_flag: "-DEIGEN_USE_HIP" + + + # Security hardening on by default. + # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases. + # We need to undef it before redefining it as some distributions now have + # it enabled by default. + #compiler_flag: "-U_FORTIFY_SOURCE" + #compiler_flag: "-D_FORTIFY_SOURCE=1" + #compiler_flag: "-fstack-protector" + #compiler_flag: "-fPIE" + #linker_flag: "-pie" + #linker_flag: "-Wl,-z,relro,-z,now" + + # Enable coloring even if there's no attached terminal. Bazel removes the + # escape sequences if --nocolor is specified. This isn't supported by gcc + # on Ubuntu 14.04. + # compiler_flag: "-fcolor-diagnostics" + + # All warnings are enabled. Maybe enable -Werror as well? + compiler_flag: "-Wall" + # Enable a few more warnings that aren't part of -Wall. + compiler_flag: "-Wunused-but-set-parameter" + # But disable some that are problematic. + compiler_flag: "-Wno-free-nonheap-object" # has false positives + + # Keep stack frames for debugging, even in opt mode. + compiler_flag: "-fno-omit-frame-pointer" + + # Anticipated future default. + linker_flag: "-no-canonical-prefixes" + unfiltered_cxx_flag: "-fno-canonical-system-headers" + # Have gcc return the exit code from ld. + linker_flag: "-pass-exit-codes" + # Stamp the binary with a unique identifier. + linker_flag: "-Wl,--build-id=md5" + linker_flag: "-Wl,--hash-style=gnu" + # Gold linker only? Can we enable this by default? + # linker_flag: "-Wl,--warn-execstack" + # linker_flag: "-Wl,--detect-odr-violations" + + # Include directory for ROCm headers. +%{rocm_include_path} + + compilation_mode_flags { + mode: DBG + # Enable debug symbols. + compiler_flag: "-g" + } + compilation_mode_flags { + mode: OPT + + # No debug symbols. + # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or + # even generally? However, that can't happen here, as it requires special + # handling in Bazel. + compiler_flag: "-g0" + + # Conservative choice for -O + # -O3 can increase binary size and even slow down the resulting binaries. + # Profile first and / or use FDO if you need better performance than this. + compiler_flag: "-O2" + + # Disable assertions + compiler_flag: "-DNDEBUG" + + # Removal of unused code and data at link time (can this increase binary size in some cases?). + compiler_flag: "-ffunction-sections" + compiler_flag: "-fdata-sections" + linker_flag: "-Wl,--gc-sections" + } + linking_mode_flags { mode: DYNAMIC } +} diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl new file mode 100755 index 0000000000..824238022b --- /dev/null +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -0,0 +1,241 @@ +#!/usr/bin/env python +"""Crosstool wrapper for compiling ROCm programs. + +SYNOPSIS: + crosstool_wrapper_driver_rocm [options passed in by cc_library() + or cc_binary() rule] + +DESCRIPTION: + This script is expected to be called by the cc_library() or cc_binary() bazel + rules. When the option "-x rocm" is present in the list of arguments passed + to this script, it invokes the hipcc compiler. Most arguments are passed + as is as a string to --compiler-options of hipcc. When "-x rocm" is not + present, this wrapper invokes gcc with the input arguments as is. +""" + +from __future__ import print_function + +__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)' + +from argparse import ArgumentParser +import os +import subprocess +import re +import sys +import pipes + +# Template values set by rocm_configure.bzl. +CPU_COMPILER = ('%{cpu_compiler}') +GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') + +HIPCC_PATH = '%{hipcc_path}' +PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) + +def Log(s): + print('gpus/crosstool: {0}'.format(s)) + + +def GetOptionValue(argv, option): + """Extract the list of values for option from the argv list. + + Args: + argv: A list of strings, possibly the argv passed to main(). + option: The option whose value to extract, without the leading '-'. + + Returns: + A list of values, either directly following the option, + (eg., -opt val1 val2) or values collected from multiple occurrences of + the option (eg., -opt val1 -opt val2). + """ + + parser = ArgumentParser() + parser.add_argument('-' + option, nargs='*', action='append') + args, _ = parser.parse_known_args(argv) + if not args or not vars(args)[option]: + return [] + else: + return sum(vars(args)[option], []) + + +def GetHostCompilerOptions(argv): + """Collect the -isystem, -iquote, and --sysroot option values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be used as the --compiler-options to hipcc. + """ + + parser = ArgumentParser() + parser.add_argument('-isystem', nargs='*', action='append') + parser.add_argument('-iquote', nargs='*', action='append') + parser.add_argument('--sysroot', nargs=1) + parser.add_argument('-g', nargs='*', action='append') + parser.add_argument('-fno-canonical-system-headers', action='store_true') + + args, _ = parser.parse_known_args(argv) + + opts = '' + + if args.isystem: + opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, [])) + if args.iquote: + opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) + if args.g: + opts += ' -g' + ' -g'.join(sum(args.g, [])) + #if args.fno_canonical_system_headers: + # opts += ' -fno-canonical-system-headers' + if args.sysroot: + opts += ' --sysroot ' + args.sysroot[0] + + return opts + +def GetHipccOptions(argv): + """Collect the -hipcc_options values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be passed directly to hipcc. + """ + + parser = ArgumentParser() + parser.add_argument('-hipcc_options', nargs='*', action='append') + + args, _ = parser.parse_known_args(argv) + + if args.hipcc_options: + options = _update_options(sum(args.hipcc_options, [])) + return ' '.join(['--'+a for a in options]) + return '' + + +def InvokeHipcc(argv, log=False): + """Call hipcc with arguments assembled from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + log: True if logging is requested. + + Returns: + The return value of calling os.system('hipcc ' + args) + """ + + host_compiler_options = GetHostCompilerOptions(argv) + hipcc_compiler_options = GetHipccOptions(argv) + opt_option = GetOptionValue(argv, 'O') + m_options = GetOptionValue(argv, 'm') + m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) + include_options = GetOptionValue(argv, 'I') + out_file = GetOptionValue(argv, 'o') + depfiles = GetOptionValue(argv, 'MF') + defines = GetOptionValue(argv, 'D') + defines = ''.join([' -D' + define for define in defines]) + undefines = GetOptionValue(argv, 'U') + undefines = ''.join([' -U' + define for define in undefines]) + std_options = GetOptionValue(argv, 'std') + hipcc_allowed_std_options = ["c++11"] + std_options = ''.join([' -std=' + define + for define in std_options if define in hipcc_allowed_std_options]) + + # The list of source files get passed after the -c option. I don't know of + # any other reliable way to just get the list of source files to be compiled. + src_files = GetOptionValue(argv, 'c') + + if len(src_files) == 0: + return 1 + if len(out_file) != 1: + return 1 + + opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0) + else ' -g') + + includes = (' -I ' + ' -I '.join(include_options) + if len(include_options) > 0 + else '') + + # Unfortunately, there are other options that have -c prefix too. + # So allowing only those look like C/C++ files. + src_files = [f for f in src_files if + re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)] + srcs = ' '.join(src_files) + out = ' -o ' + out_file[0] + + hipccopts = ' ' + hipccopts += ' ' + hipcc_compiler_options + hipccopts += undefines + hipccopts += defines + hipccopts += std_options + hipccopts += m_options + + if depfiles: + # Generate the dependency file + depfile = depfiles[0] + cmd = (HIPCC_PATH + ' ' + hipccopts + + host_compiler_options + + ' ' + GCC_HOST_COMPILER_PATH + + ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile) + if log: Log(cmd) + exit_status = os.system(cmd) + if exit_status != 0: + return exit_status + + cmd = (HIPCC_PATH + ' ' + hipccopts + + host_compiler_options + ' -fPIC' + + ' ' + GCC_HOST_COMPILER_PATH + + ' -I .' + opt + includes + ' -c ' + srcs + out) + + # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. + # Need to investigate and fix. + cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd + if log: Log(cmd) + return os.system(cmd) + + +def main(): + # ignore PWD env var + os.environ['PWD']='' + + parser = ArgumentParser() + parser.add_argument('-x', nargs=1) + parser.add_argument('--rocm_log', action='store_true') + parser.add_argument('-pass-exit-codes', action='store_true') + args, leftover = parser.parse_known_args(sys.argv[1:]) + + if args.x and args.x[0] == 'rocm': + if args.rocm_log: Log('-x rocm') + leftover = [pipes.quote(s) for s in leftover] + if args.rocm_log: Log('using hipcc') + return InvokeHipcc(leftover, log=args.rocm_log) + + # XXX use hipcc to link + if args.pass_exit_codes: + gpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('-pass-exit-codes'))] + + # special handling for $ORIGIN + # - guard every argument with '' + modified_gpu_compiler_flags = [] + for flag in gpu_compiler_flags: + modified_gpu_compiler_flags.append("'" + flag + "'") + + if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags))) + return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags) + + # Strip our flags before passing through to the CPU compiler for files which + # are not -x rocm. We can't just pass 'leftover' because it also strips -x. + # We not only want to pass -x to the CPU compiler, but also keep it in its + # relative location in the argv list (the compiler is actually sensitive to + # this). + cpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('--rocm_log'))] + + # XXX: SE codes need to be built with gcc, but need this macro defined + cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__") + + return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl new file mode 100644 index 0000000000..8258bb3589 --- /dev/null +++ b/third_party/gpus/rocm/BUILD.tpl @@ -0,0 +1,99 @@ +licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "using_hipcc", + values = { + "define": "using_rocm_hipcc=true", + }, +) + +cc_library( + name = "rocm_headers", + hdrs = [ + "rocm/rocm_config.h", + %{rocm_headers} + ], + includes = [ + ".", + "rocm/include", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "hip", + srcs = ["rocm/lib/%{hip_lib}"], + data = ["rocm/lib/%{hip_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocblas", + srcs = ["rocm/lib/%{rocblas_lib}"], + data = ["rocm/lib/%{rocblas_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocfft", + srcs = ["rocm/lib/%{rocfft_lib}"], + data = ["rocm/lib/%{rocfft_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "hiprand", + srcs = ["rocm/lib/%{hiprand_lib}"], + data = ["rocm/lib/%{hiprand_lib}"], + includes = [ + ".", + "rocm/include", + "rocm/include/rocrand", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "miopen", + srcs = ["rocm/lib/%{miopen_lib}"], + data = ["rocm/lib/%{miopen_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocm", + visibility = ["//visibility:public"], + deps = [ + ":rocm_headers", + ":hip", + ":rocblas", + ":rocfft", + ":hiprand", + ":miopen", + ], +) + +%{rocm_include_genrules} diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl new file mode 100644 index 0000000000..306f57551f --- /dev/null +++ b/third_party/gpus/rocm/build_defs.bzl.tpl @@ -0,0 +1,32 @@ +# Macros for building ROCm code. +def if_rocm(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with ROCm. + + Returns a select statement which evaluates to if_true if we're building + with ROCm enabled. Otherwise, the select statement evaluates to if_false. + + """ + return select({ + "@local_config_rocm//rocm:using_hipcc": if_true, + "//conditions:default": if_false + }) + + +def rocm_default_copts(): + """Default options for all ROCm compilations.""" + return if_rocm(["-x", "rocm"] + %{rocm_extra_copts}) + + +def rocm_is_configured(): + """Returns true if ROCm was enabled during the configure process.""" + return %{rocm_is_configured} + +def if_rocm_is_configured(x): + """Tests if the ROCm was enabled during the configure process. + + Unlike if_rocm(), this does not require that we are building with + --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries. + """ + if rocm_is_configured(): + return x + return [] diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl new file mode 100644 index 0000000000..c5f25a845c --- /dev/null +++ b/third_party/gpus/rocm/rocm_config.h.tpl @@ -0,0 +1,21 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef ROCM_ROCM_CONFIG_H_ +#define ROCM_ROCM_CONFIG_H_ + +#define TF_ROCM_TOOLKIT_PATH "/opt/rocm" + +#endif // ROCM_ROCM_CONFIG_H_ diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl new file mode 100644 index 0000000000..9371e33f97 --- /dev/null +++ b/third_party/gpus/rocm_configure.bzl @@ -0,0 +1,663 @@ +# -*- Python -*- +"""Repository rule for ROCm autoconfiguration. + +`rocm_configure` depends on the following environment variables: + + * `TF_NEED_ROCM`: Whether to enable building with ROCm. + * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path + * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is + `/opt/rocm`. + * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then + use the system default. + * `TF_MIOPEN_VERSION`: The version of the MIOpen library. + * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is + `gfx803,gfx900`. +""" + +_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" +_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH" +_TF_ROCM_VERSION = "TF_ROCM_VERSION" +_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION" +_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" +_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO" + +_DEFAULT_ROCM_VERSION = "" +_DEFAULT_MIOPEN_VERSION = "" +_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm" +_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"] + +def find_cc(repository_ctx): + """Find the C++ compiler.""" + # Return a dummy value for GCC detection here to avoid error + target_cc_name = "gcc" + cc_path_envvar = _GCC_HOST_COMPILER_PATH + cc_name = target_cc_name + + if cc_path_envvar in repository_ctx.os.environ: + cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip() + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Absolute path, maybe we should make this supported by our which function. + return cc_name + cc = repository_ctx.which(cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(target_cc_name, cc_path_envvar)) + return cc + +_INC_DIR_MARKER_BEGIN = "#include <...>" + +def _cxx_inc_convert(path): + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + return path + +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp): + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + # TODO: We pass -no-canonical-prefixes here to match the compiler flags, + # but in rocm_clang CROSSTOOL file that is a `feature` and we should + # handle the case when it's disabled and no flag is passed + result = repository_ctx.execute([cc, "-no-canonical-prefixes", + "-E", "-x" + lang, "-", "-v"]) + index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = result.stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = result.stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = result.stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = result.stderr[index1 + 1:] + else: + inc_dirs = result.stderr[index1 + 1:index2].strip() + + return [str(repository_ctx.path(_cxx_inc_convert(p))) + for p in inc_dirs.split("\n")] + +def get_cxx_inc_directories(repository_ctx, cc): + """Compute the list of default C and C++ include directories.""" + # For some reason `clang -xc` sometimes returns include paths that are + # different from the ones from `clang -xc++`. (Symlink and a dir) + # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists + includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) + includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) + + includes_cpp_set = depset(includes_cpp) + return includes_cpp + [inc for inc in includes_c + if inc not in includes_cpp_set] + +def auto_configure_fail(msg): + """Output failure message when rocm configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg)) +# END cc_configure common functions (see TODO above). + +def _host_compiler_includes(repository_ctx, cc): + """Generates the cxx_builtin_include_directory entries for gcc inc dirs. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A string containing the cxx_builtin_include_directory for each of the gcc + host compiler include directories, which can be added to the CROSSTOOL + file. + """ + inc_dirs = get_cxx_inc_directories(repository_ctx, cc) + + # Add numpy headers + inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include") + + entries = [] + for inc_dir in inc_dirs: + entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + + # define TENSORFLOW_USE_ROCM + entries.append(" unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"") + + return "\n".join(entries) + +def _rocm_include_path(repository_ctx, rocm_config): + """Generates the cxx_builtin_include_directory entries for rocm inc dirs. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A string containing the cxx_builtin_include_directory for each of the gcc + host compiler include directories, which can be added to the CROSSTOOL + file. + """ + inc_dirs = [] + + # general ROCm include path + inc_dirs.append(rocm_config.rocm_toolkit_path + '/include') + + # Add HSA headers + inc_dirs.append("/opt/rocm/hsa/include") + + # Add HIP headers + inc_dirs.append("/opt/rocm/include/hip") + inc_dirs.append("/opt/rocm/include/hip/hcc_detail") + + # Add rocrand and hiprand headers + inc_dirs.append("/opt/rocm/rocrand/include") + inc_dirs.append("/opt/rocm/hiprand/include") + + # Add rocfft headers + inc_dirs.append("/opt/rocm/rocfft/include") + + # Add rocBLAS headers + inc_dirs.append("/opt/rocm/rocblas/include") + + # Add MIOpen headers + inc_dirs.append("/opt/rocm/miopen/include") + + # Add hcc headers + inc_dirs.append("/opt/rocm/hcc/include") + inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/") + inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include") + # Newer hcc builds use/are based off of clang 8.0.0. + inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/") + inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include") + + inc_entries = [] + for inc_dir in inc_dirs: + inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + return "\n".join(inc_entries) + +def _enable_rocm(repository_ctx): + if "TF_NEED_ROCM" in repository_ctx.os.environ: + enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip() + return enable_rocm == "1" + return False + +def _rocm_toolkit_path(repository_ctx): + """Finds the rocm toolkit directory. + + Args: + repository_ctx: The repository context. + + Returns: + A speculative real path of the rocm toolkit install directory. + """ + rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH + if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ: + rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip() + if not repository_ctx.path(rocm_toolkit_path).exists: + auto_configure_fail("Cannot find rocm toolkit path.") + return str(repository_ctx.path(rocm_toolkit_path).realpath) + +def _amdgpu_targets(repository_ctx): + """Returns a list of strings representing AMDGPU targets.""" + if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ: + return _DEFAULT_ROCM_AMDGPU_TARGETS + amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS] + amdgpu_targets = amdgpu_targets_str.split(",") + for amdgpu_target in amdgpu_targets: + if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit(): + auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target) + return amdgpu_targets + +def _cpu_value(repository_ctx): + """Returns the name of the host operating system. + + Args: + repository_ctx: The repository context. + + Returns: + A string containing the name of the host operating system. + """ + os_name = repository_ctx.os.name.lower() + if os_name.startswith("mac os"): + return "Darwin" + if os_name.find("windows") != -1: + return "Windows" + result = repository_ctx.execute(["uname", "-s"]) + return result.stdout.strip() + +def _lib_name(lib, cpu_value, version="", static=False): + """Constructs the platform-specific name of a library. + + Args: + lib: The name of the library, such as "hip" + cpu_value: The name of the host operating system. + version: The version of the library. + static: True the library is static or False if it is a shared object. + + Returns: + The platform-specific name of the library. + """ + if cpu_value in ("Linux"): + if static: + return "lib%s.a" % lib + else: + if version: + version = ".%s" % version + return "lib%s.so%s" % (lib, version) + elif cpu_value == "Windows": + return "%s.lib" % lib + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % lib + elif version: + version = ".%s" % version + return "lib%s%s.dylib" % (lib, version) + else: + auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + +def _find_rocm_lib(lib, repository_ctx, cpu_value, basedir, version="", + static=False): + """Finds the given ROCm libraries on the system. + + Args: + lib: The name of the library, such as "hip" + repository_ctx: The repository context. + cpu_value: The name of the host operating system. + basedir: The install directory of ROCm. + version: The version of the library. + static: True if static library, False if shared object. + + Returns: + Returns a struct with the following fields: + file_name: The basename of the library found on the system. + path: The full path to the library. + """ + file_name = _lib_name(lib, cpu_value, version, static) + if cpu_value == "Linux": + path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path( + "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + path = repository_ctx.path("%s/lib/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path("%s/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + auto_configure_fail("Cannot find rocm library %s" % file_name) + +def _find_libs(repository_ctx, rocm_config): + """Returns the ROCm libraries on the system. + + Args: + repository_ctx: The repository context. + rocm_config: The ROCm config as returned by _get_rocm_config + + Returns: + Map of library names to structs of filename and path as returned by + _find_rocm_lib. + """ + cpu_value = rocm_config.cpu_value + return { + "hip": _find_rocm_lib( + "hip_hcc", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path), + "rocblas": _find_rocm_lib( + "rocblas", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocblas"), + "rocfft": _find_rocm_lib( + "rocfft", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocfft"), + "hiprand": _find_rocm_lib( + "hiprand", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/hiprand"), + "miopen": _find_rocm_lib( + "MIOpen", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/miopen"), + } + +def _get_rocm_config(repository_ctx): + """Detects and returns information about the ROCm installation on the system. + + Args: + repository_ctx: The repository context. + + Returns: + A struct containing the following fields: + rocm_toolkit_path: The ROCm toolkit installation directory. + amdgpu_targets: A list of the system's AMDGPU targets. + cpu_value: The name of the host operating system. + """ + cpu_value = _cpu_value(repository_ctx) + rocm_toolkit_path = _rocm_toolkit_path(repository_ctx) + return struct( + rocm_toolkit_path = rocm_toolkit_path, + amdgpu_targets = _amdgpu_targets(repository_ctx), + cpu_value = cpu_value) + +def _tpl(repository_ctx, tpl, substitutions={}, out=None): + if not out: + out = tpl.replace(":", "/") + repository_ctx.template( + out, + Label("//third_party/gpus/%s.tpl" % tpl), + substitutions) + + +def _file(repository_ctx, label): + repository_ctx.template( + label.replace(":", "/"), + Label("//third_party/gpus/%s.tpl" % label), + {}) + + +_DUMMY_CROSSTOOL_BZL_FILE = """ +def error_gpu_disabled(): + fail("ERROR: Building with --config=rocm but TensorFlow is not configured " + + "to build with GPU support. Please re-run ./configure and enter 'Y' " + + "at the prompt to build with GPU support.") + + native.genrule( + name = "error_gen_crosstool", + outs = ["CROSSTOOL"], + cmd = "echo 'Should not be run.' && exit 1", + ) + + native.filegroup( + name = "crosstool", + srcs = [":CROSSTOOL"], + output_licenses = ["unencumbered"], + ) +""" + + +_DUMMY_CROSSTOOL_BUILD_FILE = """ +load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") + +error_gpu_disabled() +""" + +def _create_dummy_repository(repository_ctx): + cpu_value = _cpu_value(repository_ctx) + + # Set up BUILD file for rocm/. + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "False", + "%{rocm_extra_copts}": "[]" + }) + _tpl(repository_ctx, "rocm:BUILD", + { + "%{hip_lib}": _lib_name("hip", cpu_value), + "%{rocblas_lib}": _lib_name("rocblas", cpu_value), + "%{miopen_lib}": _lib_name("miopen", cpu_value), + "%{rocfft_lib}": _lib_name("rocfft", cpu_value), + "%{hiprand_lib}": _lib_name("hiprand", cpu_value), + "%{rocm_include_genrules}": '', + "%{rocm_headers}": '', + }) + + # Create dummy files for the ROCm toolkit since they are still required by + # tensorflow/core/platform/default/build_config:rocm. + repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "") + + # Set up rocm_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl(repository_ctx, "rocm:rocm_config.h", + { + "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH, + }, "rocm/rocm/rocm_config.h") + + # If rocm_configure is not configured to build with GPU support, and the user + # attempts to build with --config=rocm, add a dummy build rule to intercept + # this and fail with an actionable error message. + repository_ctx.file("crosstool/error_gpu_disabled.bzl", + _DUMMY_CROSSTOOL_BZL_FILE) + repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) + +def _execute(repository_ctx, cmdline, error_msg=None, error_details=None, + empty_stdout_fine=False): + """Executes an arbitrary shell command. + + Args: + repository_ctx: the repository_ctx object + cmdline: list of strings, the command to execute + error_msg: string, a summary of the error if the command fails + error_details: string, details about the error or steps to fix it + empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise + it's an error + Return: + the result of repository_ctx.execute(cmdline) + """ + result = repository_ctx.execute(cmdline) + if result.stderr or not (empty_stdout_fine or result.stdout): + auto_configure_fail( + "\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else ""])) + return result + +def _norm_path(path): + """Returns a path with '/' and remove the trailing slash.""" + path = path.replace("\\", "/") + if path[-1] == "/": + path = path[:-1] + return path + +def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, + src_files = [], dest_files = []): + """Returns a genrule to symlink(or copy if on Windows) a set of files. + + If src_dir is passed, files will be read from the given directory; otherwise + we assume files are in src_files and dest_files + """ + if src_dir != None: + src_dir = _norm_path(src_dir) + dest_dir = _norm_path(dest_dir) + files = _read_dir(repository_ctx, src_dir) + # Create a list with the src_dir stripped to use for outputs. + dest_files = files.replace(src_dir, '').splitlines() + src_files = files.splitlines() + command = [] + # We clear folders that might have been generated previously to avoid + # undesired inclusions + command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi') + command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi') + outs = [] + for i in range(len(dest_files)): + if dest_files[i] != "": + # If we have only one file to link we do not want to use the dest_dir, as + # $(@D) will include the full path to the file. + dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i] + # On Windows, symlink is not supported, so we just copy all the files. + cmd = 'ln -s' + command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest)) + outs.append(' "' + dest_dir + dest_files[i] + '",') + genrule = _genrule(src_dir, genrule_name, " && ".join(command), + "\n".join(outs)) + return genrule + +def _genrule(src_dir, genrule_name, command, outs): + """Returns a string with a genrule. + + Genrule executes the given command and produces the given outputs. + """ + return ( + 'genrule(\n' + + ' name = "' + + genrule_name + '",\n' + + ' outs = [\n' + + outs + + '\n ],\n' + + ' cmd = """\n' + + command + + '\n """,\n' + + ')\n' + ) + +def _read_dir(repository_ctx, src_dir): + """Returns a string with all files in a directory. + + Finds all files inside a directory, traversing subfolders and following + symlinks. The returned string contains the full path of all files + separated by line breaks. + """ + find_result = _execute( + repository_ctx, ["find", src_dir, "-follow", "-type", "f"], + empty_stdout_fine=True) + result = find_result.stdout + return result + +def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets): + if False: + amdgpu_target_flags = ["--amdgpu-target=" + + amdgpu_target for amdgpu_target in amdgpu_targets] + else: + # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc" + amdgpu_target_flags = [] + return str(amdgpu_target_flags) + +def _create_local_rocm_repository(repository_ctx): + """Creates the repository containing files set up to build with ROCm.""" + rocm_config = _get_rocm_config(repository_ctx) + + # Set up symbolic links for the rocm toolkit by creating genrules to do + # symlinking. We create one genrule for each directory we want to track under + # rocm_toolkit_path + rocm_toolkit_path = rocm_config.rocm_toolkit_path + rocm_include_path = rocm_toolkit_path + "/include" + genrules = [_symlink_genrule_for_dir(repository_ctx, + rocm_include_path, "rocm/include", "rocm-include")] + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/rocfft/include", "rocm/include/rocfft", "rocfft-include")) + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/rocblas/include", "rocm/include/rocblas", "rocblas-include")) + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/miopen/include", "rocm/include/miopen", "miopen-include")) + + rocm_libs = _find_libs(repository_ctx, rocm_config) + rocm_lib_src = [] + rocm_lib_dest = [] + for lib in rocm_libs.values(): + rocm_lib_src.append(lib.path) + rocm_lib_dest.append("rocm/lib/" + lib.file_name) + genrules.append(_symlink_genrule_for_dir(repository_ctx, None, "", "rocm-lib", + rocm_lib_src, rocm_lib_dest)) + + included_files = _read_dir(repository_ctx, rocm_include_path).replace( + rocm_include_path, '').splitlines() + + # Set up BUILD file for rocm/ + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "True", + "%{rocm_extra_copts}": _compute_rocm_extra_copts( + repository_ctx, rocm_config.amdgpu_targets), + + }) + _tpl(repository_ctx, "rocm:BUILD", + { + "%{hip_lib}": rocm_libs["hip"].file_name, + "%{rocblas_lib}": rocm_libs["rocblas"].file_name, + "%{rocfft_lib}": rocm_libs["rocfft"].file_name, + "%{hiprand_lib}": rocm_libs["hiprand"].file_name, + "%{miopen_lib}": rocm_libs["miopen"].file_name, + "%{rocm_include_genrules}": "\n".join(genrules), + "%{rocm_headers}": ('":rocm-include",\n' + + '":rocfft-include",\n' + + '":rocblas-include",\n' + + '":miopen-include",'), + }) + # Set up crosstool/ + _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"}) + cc = find_cc(repository_ctx) + host_compiler_includes = _host_compiler_includes(repository_ctx, cc) + rocm_defines = { + "%{rocm_include_path}": _rocm_include_path(repository_ctx, + rocm_config), + "%{host_compiler_includes}": host_compiler_includes, + "%{clang_path}": str(cc), + } + + _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out="crosstool/CROSSTOOL") + + _tpl(repository_ctx, + "crosstool:clang/bin/crosstool_wrapper_driver_rocm", + { + "%{cpu_compiler}": str(cc), + "%{hipcc_path}": "/opt/rocm/bin/hipcc", + "%{gcc_host_compiler_path}": str(cc), + "%{rocm_amdgpu_targets}": ",".join( + ["\"%s\"" % c for c in rocm_config.amdgpu_targets]), + }) + + # Set up rocm_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl(repository_ctx, "rocm:rocm_config.h", + { + "%{rocm_amdgpu_targets}": ",".join( + ["\"%s\"" % c for c in rocm_config.amdgpu_targets]), + "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path, + }, "rocm/rocm/rocm_config.h") + + +def _create_remote_rocm_repository(repository_ctx, remote_config_repo): + """Creates pointers to a remotely configured repo set up to build with ROCm.""" + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "True", + "%{rocm_extra_copts}": _compute_rocm_extra_copts( + repository_ctx, #_compute_capabilities(repository_ctx) + ), + + }) + _tpl(repository_ctx, "rocm:remote.BUILD", + { + "%{remote_rocm_repo}": remote_config_repo, + }, "rocm/BUILD") + _tpl(repository_ctx, "crosstool:remote.BUILD", { + "%{remote_rocm_repo}": remote_config_repo, + }, "crosstool/BUILD") + +def _rocm_autoconf_impl(repository_ctx): + """Implementation of the rocm_autoconf repository rule.""" + if not _enable_rocm(repository_ctx): + _create_dummy_repository(repository_ctx) + else: + if _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ: + _create_remote_rocm_repository(repository_ctx, + repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO]) + else: + _create_local_rocm_repository(repository_ctx) + + +rocm_configure = repository_rule( + implementation = _rocm_autoconf_impl, + environ = [ + _GCC_HOST_COMPILER_PATH, + "TF_NEED_ROCM", + _ROCM_TOOLKIT_PATH, + _TF_ROCM_VERSION, + _TF_MIOPEN_VERSION, + _TF_ROCM_AMDGPU_TARGETS, + _TF_ROCM_CONFIG_REPO, + ], +) + +"""Detects and configures the local ROCm toolchain. + +Add the following to your WORKSPACE FILE: + +```python +rocm_configure(name = "local_config_rocm") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/tools/bazel.rc b/tools/bazel.rc index 601e07ffdd..afc5cf56ab 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -42,6 +42,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld' build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true +build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain +build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true + build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true -- GitLab From 18b80bbd4b8db8bd35afad7264258c1c5c269226 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Wed, 5 Sep 2018 22:56:20 -0700 Subject: [PATCH 0082/1357] Updated with more unit tests --- third_party/ngraph/ngraph.BUILD | 4 ++-- third_party/ngraph/ngraph_tf.BUILD | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD index 426d49c542..1fd1b8e8e0 100644 --- a/third_party/ngraph/ngraph.BUILD +++ b/third_party/ngraph/ngraph.BUILD @@ -101,7 +101,7 @@ cc_library( "-I external/ngraph/src", "-I external/nlohmann_json_lib/include/", '-D SHARED_LIB_EXT=\\".so\\"', - '-D NGRAPH_VERSION=\\"0.5.0\\"', + '-D NGRAPH_VERSION=\\"0.7.0\\"', "-D NGRAPH_DEX_ONLY", ], visibility = ["//visibility:public"], @@ -135,7 +135,7 @@ cc_library( "-I external/ngraph/src", "-I external/nlohmann_json_lib/include/", '-D SHARED_LIB_EXT=\\".so\\"', - '-D NGRAPH_VERSION=\\"0.5.0\\"', + '-D NGRAPH_VERSION=\\"0.7.0\\"', ], visibility = ["//visibility:public"], alwayslink = 1, diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index 7577a4014d..979318d7c2 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -70,6 +70,10 @@ tf_cc_test( "test/graph_rewrites/assign_clusters.cc", "test/test_utilities.h", "test/test_utilities.cpp", + "test/test_math_ops.cpp", + "test/test_nn_ops.cpp", + "test/opexecuter.h", + "test/opexecuter.cpp", "test/main.cpp", ], deps = [ -- GitLab From d0574f6b25ab01052e093ab92612520a7e4ada8d Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Thu, 6 Sep 2018 08:22:37 -0700 Subject: [PATCH 0083/1357] Fixed clang formatting --- .../stream_executor/cuda/cuda_gpu_executor.cc | 17 +++++++++-------- .../stream_executor/cuda/cuda_gpu_executor.h | 12 ++++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index ce2f1ce3ae..ef84d01a94 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { +int CUDAExecutor::CalculateOccupancy( + const DeviceDescription& device_description, uint64 registers_per_thread, + uint64 shared_memory_per_block, const ThreadDim& thread_dims, + CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; CUresult err = @@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. int CUDAExecutor::CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, + CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; CUresult err = diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index e8ebbc3220..1481dcc19a 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const KernelArgsArrayBase &args) override; int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + + int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, const ThreadDim& thread_dims, CUfunction func); - int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); - void *Allocate(uint64 size) override; void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, -- GitLab From e3654a3cb4e26c26409aeeb9e127e3addcb14cee Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 6 Sep 2018 19:20:11 +0000 Subject: [PATCH 0084/1357] Add float16 support on GPU for tf.contrib.image.transform This fix tries to address the issue raised in 22115 where there were no float16 support on GPU for tf.contrib.image.transform. This fix fixes 22115. Signed-off-by: Yong Tang --- tensorflow/contrib/image/kernels/image_ops.cc | 2 ++ tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc | 1 + 2 files changed, 3 insertions(+) diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc index 370a8caf6a..788bf04b28 100644 --- a/tensorflow/contrib/image/kernels/image_ops.cc +++ b/tensorflow/contrib/image/kernels/image_ops.cc @@ -156,6 +156,7 @@ namespace functor { TF_CALL_uint8(DECLARE_FUNCTOR); TF_CALL_int32(DECLARE_FUNCTOR); TF_CALL_int64(DECLARE_FUNCTOR); +TF_CALL_half(DECLARE_FUNCTOR); TF_CALL_float(DECLARE_FUNCTOR); TF_CALL_double(DECLARE_FUNCTOR); @@ -175,6 +176,7 @@ TF_CALL_double(DECLARE_FUNCTOR); TF_CALL_uint8(REGISTER); TF_CALL_int32(REGISTER); TF_CALL_int64(REGISTER); +TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); diff --git a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc index 8743a5ff72..36b9a236a6 100644 --- a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc +++ b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc @@ -32,6 +32,7 @@ typedef Eigen::GpuDevice GPUDevice; template class FillProjectiveTransform; template class FillProjectiveTransform; template class FillProjectiveTransform; +template class FillProjectiveTransform; template class FillProjectiveTransform; template class FillProjectiveTransform; -- GitLab From 7d7e8a725aeede4b724f7376d22df2c7f2ebdcf9 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 6 Sep 2018 19:22:39 +0000 Subject: [PATCH 0085/1357] Add test case for float16 support on GPU for tf.contrib.image.transform Signed-off-by: Yong Tang --- .../contrib/image/python/kernel_tests/image_ops_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index 376c0751ee..ef1f79bb94 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -272,6 +272,13 @@ class ImageOpsTest(test_util.TensorFlowTestCase): with self.cached_session(): self.assertAllEqual([[[[1], [0]], [[0], [1]]]], result.eval()) + def test_transform_data_types(self): + for dtype in _DTYPES: + image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) + value = image_ops.transform(image, [1] * 8) + with self.test_session(use_gpu=True): + self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) + class BipartiteMatchTest(test_util.TensorFlowTestCase): -- GitLab From 04e20965487c36f43ba5c773b547b23e39478a5c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 6 Sep 2018 19:25:22 +0000 Subject: [PATCH 0086/1357] Pylint fix Signed-off-by: Yong Tang --- .../contrib/image/python/kernel_tests/image_ops_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index ef1f79bb94..4997c31a7f 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -277,7 +277,9 @@ class ImageOpsTest(test_util.TensorFlowTestCase): image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) value = image_ops.transform(image, [1] * 8) with self.test_session(use_gpu=True): - self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) + self.assertAllEqual( + value.eval(), + np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) class BipartiteMatchTest(test_util.TensorFlowTestCase): -- GitLab From 6a5090b086bc9d665eb9e65f05eb94cdb58baaa2 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Thu, 6 Sep 2018 13:09:12 -0700 Subject: [PATCH 0087/1357] Fully fixed clang errors --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 12 ++++++------ tensorflow/stream_executor/cuda/cuda_gpu_executor.h | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index ef84d01a94..9d5bcc7f77 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -472,7 +472,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, const DeviceDescription &device_description = kernel.parent()->GetDeviceDescription(); - const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel); + const CUDAKernel *cuda_kernel = AsCUDAKernel(&kernel); CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue(); int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread, @@ -494,8 +494,8 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. int CUDAExecutor::CalculateOccupancy( - const DeviceDescription& device_description, uint64 registers_per_thread, - uint64 shared_memory_per_block, const ThreadDim& thread_dims, + const DeviceDescription &device_description, uint64 registers_per_thread, + uint64 shared_memory_per_block, const ThreadDim &thread_dims, CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; @@ -508,11 +508,11 @@ int CUDAExecutor::CalculateOccupancy( // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. -int CUDAExecutor::CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, +int CUDAExecutor::CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, + const ThreadDim &thread_dims, CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 1481dcc19a..53b2a29ae7 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -70,16 +70,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const BlockDim &block_dims, const KernelBase &k, const KernelArgsArrayBase &args) override; - int CalculateOccupancy(const DeviceDescription& device_description, + int CalculateOccupancy(const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); + const ThreadDim &thread_dims, CUfunction func); - int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, + int CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); + const ThreadDim &thread_dims, CUfunction func); void *Allocate(uint64 size) override; -- GitLab From a0da587dddb7ec2bd703e15882b68085cfd7933e Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Fri, 7 Sep 2018 06:48:27 +0900 Subject: [PATCH 0088/1357] fix documentation errors --- .../api_def_ExtractVolumePatches.pbtxt | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt index 3499ade368..3c8a455983 100644 --- a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt @@ -1,32 +1,32 @@ op { graph_op_name: "ExtractVolumePatches" in_arg { - name: "images" + name: "input" description: < Date: Fri, 7 Sep 2018 08:16:48 +0900 Subject: [PATCH 0089/1357] fix argument name --- tensorflow/core/ops/array_ops.cc | 2 +- tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 2 +- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 6c8369200a..44908fe875 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2553,7 +2553,7 @@ REGISTER_OP("ExtractImagePatches") // as the second parameter of all GetWindowedOutputSizeVerbose calls instead // of ksize_*. REGISTER_OP("ExtractVolumePatches") - .Input("images: T") + .Input("input: T") .Output("patches: T") .Attr("ksizes: list(int) >= 5") .Attr("strides: list(int) >= 5") diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index ba928eba9e..eafcc208cc 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1062,7 +1062,7 @@ tf_module { } member_method { name: "extract_volume_patches" - argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'input\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "eye" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index f7e63978da..cd06ee5763 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -1062,7 +1062,7 @@ tf_module { } member_method { name: "extract_volume_patches" - argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'input\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "eye" -- GitLab From e25cf78285fef5234380ee26fef9090a939e91f5 Mon Sep 17 00:00:00 2001 From: Richard Yu Date: Thu, 6 Sep 2018 17:05:08 -0700 Subject: [PATCH 0090/1357] Ensure all ValueErrors are raised --- tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +- tensorflow/python/keras/layers/embeddings.py | 8 ++++---- tensorflow/python/ops/nn_ops.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index d9f179bee4..d882b79892 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling): bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var, context) if batch_mean_tensor is None and moving_mean_tensor is None: - ValueError('Error folding unfused batch norms') + raise ValueError('Error folding unfused batch norms') if has_scaling: gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context) diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py index 629a9ec9a1..a0b9393812 100644 --- a/tensorflow/python/keras/layers/embeddings.py +++ b/tensorflow/python/keras/layers/embeddings.py @@ -142,13 +142,13 @@ class Embedding(Layer): else: in_lens = [self.input_length] if len(in_lens) != len(input_shape) - 1: - ValueError('"input_length" is %s, but received input has shape %s' % - (str(self.input_length), str(input_shape))) + raise ValueError('"input_length" is %s, but received input has shape %s' % + (str(self.input_length), str(input_shape))) else: for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): if s1 is not None and s2 is not None and s1 != s2: - ValueError('"input_length" is %s, but received input has shape %s' % - (str(self.input_length), str(input_shape))) + raise ValueError('"input_length" is %s, but received input has shape %s' % + (str(self.input_length), str(input_shape))) elif s1 is None: in_lens[i] = s2 return (input_shape[0],) + tuple(in_lens) + (self.output_dim,) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index ef9afd9e8e..17e10995f2 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -427,8 +427,8 @@ class _WithSpaceToBatch(object): try: input_shape.with_rank_at_least(expected_input_rank) except ValueError: - ValueError("input tensor must have rank %d at least" % - (expected_input_rank)) + raise ValueError("input tensor must have rank %d at least" % + (expected_input_rank)) const_rate = tensor_util.constant_value(dilation_rate) rate_or_const_rate = dilation_rate @@ -818,12 +818,12 @@ class Convolution(object): try: input_shape.with_rank(num_spatial_dims + 2) except ValueError: - ValueError("input tensor must have rank %d" % (num_spatial_dims + 2)) + raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2)) try: filter_shape.with_rank(num_spatial_dims + 2) except ValueError: - ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2)) + raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2)) if data_format is None or not data_format.startswith("NC"): input_channels_dim = input_shape[num_spatial_dims + 1] -- GitLab From f5eb30c29d5d34145252e49ac3f9bda067abafe8 Mon Sep 17 00:00:00 2001 From: Smokrow Date: Fri, 7 Sep 2018 09:26:44 +0200 Subject: [PATCH 0091/1357] edited flat_map description and removed typo The examples in interleave are quite helpful. I just added a reference to this example --- tensorflow/python/data/ops/dataset_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 2c1aa22116..8242c7309d 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1007,7 +1007,7 @@ class Dataset(object): return ParallelMapDataset(self, map_func, num_parallel_calls) def flat_map(self, map_func): - """Maps `map_func` across this dataset and flattens the result. + """Maps `map_func` across this dataset and flattens the result. Will produce identical results to 'tf.data.Dataset.interleave' Args: map_func: A function mapping a nested structure of tensors (having shapes @@ -1043,7 +1043,7 @@ class Dataset(object): elements are produced. `cycle_length` controls the number of input elements that are processed concurrently. If you set `cycle_length` to 1, this transformation will handle one input element at a time, and will produce - identical results = to `tf.data.Dataset.flat_map`. In general, + identical results to `tf.data.Dataset.flat_map`. In general, this transformation will apply `map_func` to `cycle_length` input elements, open iterators on the returned `Dataset` objects, and cycle through them producing `block_length` consecutive elements from each iterator, and -- GitLab From a11cb4cb1500f35266667d9f72b0a0534f2d1581 Mon Sep 17 00:00:00 2001 From: BY Shen Date: Fri, 7 Sep 2018 22:20:37 +0800 Subject: [PATCH 0092/1357] Fix a bug in TF_LITE_ENSURE_OK. --- tensorflow/contrib/lite/context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index b23183b743..58977b5c47 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -148,7 +148,7 @@ void TfLiteIntArrayFree(TfLiteIntArray* v); #define TF_LITE_ENSURE_OK(context, status) \ do { \ if ((status) != kTfLiteOk) { \ - return status; \ + return kTfLiteError; \ } \ } while (0) -- GitLab From 3445242ac138d4d5aa9b346e17cd47ebf23770a5 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 7 Sep 2018 23:39:53 +0000 Subject: [PATCH 0093/1357] Fix int64 failure on GPU for TensorArray This fix tries to address the issue raised in 22054 where int64 on GPU results in colocation errors. This fix enables int64 on GPU with TensorArray. This fix fixes 22054. Signed-off-by: Yong Tang --- tensorflow/core/kernels/tensor_array_ops.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index 2ec2651c04..82a7735c6d 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -259,6 +259,7 @@ REGISTER_KERNEL_BUILDER(Name("TensorArrayV3").Device(DEVICE_CPU), TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -487,6 +488,7 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -576,6 +578,7 @@ TF_CALL_ALL_TYPES(REGISTER_READ) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -777,6 +780,7 @@ REGISTER_GATHER_AND_PACK(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -998,6 +1002,7 @@ REGISTER_CONCAT(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -1218,6 +1223,7 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_AND_UNPACK); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA @@ -1388,6 +1394,7 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA -- GitLab From 81677d2f20664c7f76598c20f2a01d62465999b4 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 7 Sep 2018 23:42:20 +0000 Subject: [PATCH 0094/1357] Add needed specifications for Split on GPU. Signed-off-by: Yong Tang --- tensorflow/core/kernels/split_lib_gpu.cu.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index 393818730b..8623e47e41 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -54,6 +54,7 @@ void SplitCustom::operator()( TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); TF_CALL_complex128(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); TF_CALL_bfloat16(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS @@ -245,6 +246,7 @@ struct SplitVOpGPULaunch { TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); +TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #define REGISTER_GPU_KERNEL(T) \ @@ -254,6 +256,7 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); +TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL -- GitLab From bd1fd82712706592b9a6d34a6bac1b1f438eb00f Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 7 Sep 2018 19:16:04 -0700 Subject: [PATCH 0095/1357] Updated the ngraph-tf and ngraph releases. --- WORKSPACE | 12 ------------ tensorflow/workspace.bzl | 40 ++++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 15aa24f3c1..f1d0ed565d 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -80,15 +80,3 @@ new_http_archive( ], ) -new_local_repository( - name = "ngraph", - path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph", - build_file = "//third_party/ngraph:ngraph.BUILD", -) - -new_local_repository( - name = "ngraph_tf", - path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph-tf", - build_file = "//third_party/ngraph:ngraph_tf.BUILD", -) - diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0ff695d9f8..79b3df1e51 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -841,16 +841,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party/ngraph:tbb.BUILD"), ) - # tf_http_archive( - # name = "ngraph", - # urls = [ - # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - # "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - # ], - # sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", - # strip_prefix = "ngraph-0.5.0", - # build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), - # ) + tf_http_archive( + name = "ngraph", + urls = [ + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", + "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", + ], + sha256 = "", + strip_prefix = "ngraph-0.7.0", + build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), + ) tf_http_archive( name = "nlohmann_json_lib", @@ -863,16 +863,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), ) - # tf_http_archive( - # name = "ngraph_tf", - # urls = [ - # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - # "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - # ], - # sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", - # strip_prefix = "ngraph-tf-0.3.0-rc1", - # build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), - # ) + tf_http_archive( + name = "ngraph_tf", + urls = [ + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz", + "https://github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz", + ], + sha256 = "23b4566d8e40d6f1f236b0ffe3905dd964ae42ca54bacff67f24abcefd443afb", + strip_prefix = "ngraph-tf-0.5.0", + build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), + ) ############################################################################## # BIND DEFINITIONS -- GitLab From 47df1ccb1837382a526439b38cd1259fca5d074b Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 7 Sep 2018 19:18:02 -0700 Subject: [PATCH 0096/1357] Removed empty newline --- WORKSPACE | 1 - 1 file changed, 1 deletion(-) diff --git a/WORKSPACE b/WORKSPACE index f1d0ed565d..17961829a6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -79,4 +79,3 @@ new_http_archive( "http://download.tensorflow.org/models/speech_commands_v0.01.zip", ], ) - -- GitLab From 2032512ba1de376baadfa9f3983e3edbc67a6731 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 7 Sep 2018 19:21:19 -0700 Subject: [PATCH 0097/1357] Updated the sha256 for ngraph --- tensorflow/workspace.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 79b3df1e51..9a82c724b7 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -847,7 +847,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", ], - sha256 = "", + sha256 = "34434b6d5993ac5233538c84f498840db7ac91df82e225c379ee7c8f6de644a5", strip_prefix = "ngraph-0.7.0", build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), ) -- GitLab From 864e290d1776895d7877777b8368ca8bc6fc22a3 Mon Sep 17 00:00:00 2001 From: Edvard Fagerholm Date: Wed, 29 Aug 2018 11:56:35 +0300 Subject: [PATCH 0098/1357] Make tf.transpose emit simpler graph when possible If not given an explicit 'perm' parameter, tf.transpose currently emits a graph that dynamically calculates it from the rank of the input tensor. This is completely unnecessary when the rank of the input can be statically determined at graph construction time. Modify tf.transpose to emit 'perm' as a single Const node whenever possible. --- tensorflow/python/ops/array_ops.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 7bf3869ddf..9597839301 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1409,8 +1409,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False): gen_array_ops.conjugate_transpose if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose) if perm is None: - rank = gen_array_ops.rank(a) - perm = (rank - 1) - gen_math_ops._range(0, rank, 1) + a = ops.convert_to_tensor(a, name="a") + if not a.get_shape().ndims: + rank = gen_array_ops.rank(a) + perm = (rank - 1) - gen_math_ops._range(0, rank, 1) + else: + rank = a.get_shape().ndims + perm = (rank - 1) - np.arange(rank) ret = transpose_fn(a, perm, name=name) # NOTE(mrry): Setting the shape explicitly because # reverse is not handled by the shape function. -- GitLab From ea0d499693c4609a8be55add3163971f93b8f2be Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 10 Sep 2018 01:41:54 +0000 Subject: [PATCH 0099/1357] Fix python 3 GPU test failures Signed-off-by: Yong Tang --- tensorflow/core/kernels/split_lib_gpu.cu.cc | 2 -- tensorflow/core/kernels/tensor_array_ops.cc | 4 ---- 2 files changed, 6 deletions(-) diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index 8623e47e41..a4a59dbcbc 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -246,7 +246,6 @@ struct SplitVOpGPULaunch { TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); -TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #define REGISTER_GPU_KERNEL(T) \ @@ -256,7 +255,6 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); -TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index 82a7735c6d..58f1a36a90 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -488,7 +488,6 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -780,7 +779,6 @@ REGISTER_GATHER_AND_PACK(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -1002,7 +1000,6 @@ REGISTER_CONCAT(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -1394,7 +1391,6 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA -- GitLab From 90cf7fb7786c8a9c135ef73482856b082e80f61a Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Tue, 11 Sep 2018 12:48:30 +0800 Subject: [PATCH 0100/1357] Fix lint errors and typos. --- tensorflow/compiler/tests/binary_ops_test.py | 9 +++++---- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 14 +++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 8941dd4e27..069e83d083 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -179,11 +179,12 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._leaky_relu_grad, + gen_nn_ops.leaky_relu_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), - np.array( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), - expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype)) + np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + dtype=dtype), + expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], + dtype=dtype)) self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index ec14735884..8d65e0339c 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,7 +50,6 @@ class Relu6Op : public XlaOpKernel { } }; - class LeakyReluOp : public XlaOpKernel { public: explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { @@ -61,9 +60,9 @@ class LeakyReluOp : public XlaOpKernel { xla::XlaBuilder* builder = ctx->builder(); auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), static_cast(alpha_)); - ctx->SetOutput(0, - xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); + ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); } + private: float alpha_; }; @@ -115,11 +114,12 @@ class LeakyReluGradOp : public XlaOpKernel { const auto zero = xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); const auto pred = xla::Gt(ctx->Input(1), zero); - auto alpha = XlaHelpers::FloatLiteral(b, input_type(0), - static_cast(alpha_)); - ctx->SetOutput(0, - xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); + auto alpha = + XlaHelpers::FloatLiteral(b, input_type(0), static_cast(alpha_)); + ctx->SetOutput( + 0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); } + private: float alpha_; }; -- GitLab From c807662d69dd1ca8bda7c34a642b812b38a4720b Mon Sep 17 00:00:00 2001 From: Smokrow Date: Tue, 11 Sep 2018 10:35:27 +0200 Subject: [PATCH 0101/1357] added example for flat_map --- tensorflow/python/data/ops/dataset_ops.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 8242c7309d..14a1e3d803 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1007,8 +1007,20 @@ class Dataset(object): return ParallelMapDataset(self, map_func, num_parallel_calls) def flat_map(self, map_func): - """Maps `map_func` across this dataset and flattens the result. Will produce identical results to 'tf.data.Dataset.interleave' + """Maps `map_func` across this dataset and flattens the result. + + Will produce similar results to `tf.data.Dataset.interleave(cycle_length=1)`. + Use `flat_map` if you want to make sure, that the order of your dataset stays the same. + For example: + ```python + # NOTE: The following examples use `{ ... }` to represent the + # contents of a dataset. '[...]' represents a tensor. + a = {[1,2,3,4,5], [6,7,8,9], [10]} + + a.flat_map(lambda x: Dataset.from_tensors(x)) == + {[1,2,3,4,5,6,7,8,9,10]} + ``` Args: map_func: A function mapping a nested structure of tensors (having shapes and types defined by `self.output_shapes` and `self.output_types`) to a -- GitLab From 8530167f68673fa756565c0394bbe2dcdc39db05 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Fri, 24 Aug 2018 16:52:07 +0300 Subject: [PATCH 0102/1357] Add IgniteDataset that allows to work with Apache Ignite. --- configure.py | 2 + tensorflow/BUILD | 6 + tensorflow/contrib/BUILD | 15 + tensorflow/contrib/cmake/python_modules.txt | 2 + tensorflow/contrib/ignite/BUILD | 136 ++++ tensorflow/contrib/ignite/README.md | 167 ++++ tensorflow/contrib/ignite/__init__.py | 42 + .../kernels/ignite_binary_object_parser.cc | 304 +++++++ .../kernels/ignite_binary_object_parser.h | 54 ++ .../contrib/ignite/kernels/ignite_client.cc | 55 ++ .../contrib/ignite/kernels/ignite_client.h | 40 + .../contrib/ignite/kernels/ignite_dataset.cc | 123 +++ .../contrib/ignite/kernels/ignite_dataset.h | 65 ++ .../ignite/kernels/ignite_dataset_iterator.cc | 447 ++++++++++ .../ignite/kernels/ignite_dataset_iterator.h | 87 ++ .../ignite/kernels/ignite_dataset_ops.cc | 145 ++++ .../ignite/kernels/ignite_plain_client.h | 43 + .../kernels/ignite_plain_client_unix.cc | 132 +++ .../kernels/ignite_plain_client_windows.cc | 143 ++++ .../ignite/kernels/ignite_ssl_wrapper.cc | 149 ++++ .../ignite/kernels/ignite_ssl_wrapper.h | 49 ++ tensorflow/contrib/ignite/ops/dataset_ops.cc | 64 ++ .../ignite/python/ops/ignite_dataset_ops.py | 763 ++++++++++++++++++ .../ignite/python/ops/ignite_op_loader.py | 25 + .../ignite/python/tests/bin/start-plain.sh | 24 + .../ignite/python/tests/bin/start-ssl-auth.sh | 28 + .../ignite/python/tests/bin/start-ssl.sh | 26 + .../tests/config/ignite-config-plain.xml | 39 + .../tests/config/ignite-config-ssl-auth.xml | 59 ++ .../python/tests/config/ignite-config-ssl.xml | 59 ++ .../python/tests/ignite_dataset_test.py | 77 ++ .../ignite/python/tests/keystore/client.jks | Bin 0 -> 3232 bytes .../ignite/python/tests/keystore/client.pem | 69 ++ .../ignite/python/tests/keystore/server.jks | Bin 0 -> 3230 bytes .../ignite/python/tests/keystore/trust.jks | Bin 0 -> 2432 bytes .../contrib/ignite/python/tests/sql/init.sql | 20 + .../ignite/python/tests/start_ignite.sh | 30 + .../ignite/python/tests/stop_ignite.sh | 19 + 38 files changed, 3508 insertions(+) create mode 100644 tensorflow/contrib/ignite/BUILD create mode 100644 tensorflow/contrib/ignite/README.md create mode 100644 tensorflow/contrib/ignite/__init__.py create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h create mode 100644 tensorflow/contrib/ignite/ops/dataset_ops.cc create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_op_loader.py create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-plain.sh create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml create mode 100644 tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.jks create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.pem create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/server.jks create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/trust.jks create mode 100644 tensorflow/contrib/ignite/python/tests/sql/init.sql create mode 100755 tensorflow/contrib/ignite/python/tests/start_ignite.sh create mode 100755 tensorflow/contrib/ignite/python/tests/stop_ignite.sh diff --git a/configure.py b/configure.py index 361bd4764d..8f1957e870 100644 --- a/configure.py +++ b/configure.py @@ -1502,6 +1502,8 @@ def main(): 'with_aws_support', True, 'aws') set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', 'with_kafka_support', True, 'kafka') + set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite', + 'with_ignite_support', True, 'ignite') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 386e0096ff..6c29c78793 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -248,6 +248,12 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "with_ignite_support", + define_values = {"with_ignite_support": "true"}, + visibility = ["//visibility:public"], +) + # Crosses between platforms and file system libraries not supported on those # platforms due to limitations in nested select() statements. config_setting( diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 798f499870..f055e643d0 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -118,6 +118,11 @@ py_library( "//tensorflow/contrib/kafka", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite", + ], + "//conditions:default": [], }) + select({ "//tensorflow:with_aws_support_windows_override": [], "//tensorflow:with_aws_support": [ @@ -160,6 +165,11 @@ cc_library( "//tensorflow/contrib/kafka:dataset_kernels", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite:dataset_kernels", + ], + "//conditions:default": [], }) + select({ "//tensorflow:with_aws_support_windows_override": [], "//tensorflow:with_aws_support": [ @@ -197,6 +207,11 @@ cc_library( "//tensorflow/contrib/kafka:dataset_ops_op_lib", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite:dataset_ops_op_lib", + ], + "//conditions:default": [], }) + select({ "//tensorflow:with_aws_support_windows_override": [], "//tensorflow:with_aws_support": [ diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index fb871acae9..56755e817a 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -207,6 +207,8 @@ tensorflow/contrib/integrate/python tensorflow/contrib/integrate/python/ops tensorflow/contrib/kafka/python tensorflow/contrib/kafka/python/ops +tensorflow/contrib/ignite/python +tensorflow/contrib/ignite/python/ops tensorflow/contrib/keras tensorflow/contrib/keras/api tensorflow/contrib/keras/api/keras diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD new file mode 100644 index 0000000000..9f6c666893 --- /dev/null +++ b/tensorflow/contrib/ignite/BUILD @@ -0,0 +1,136 @@ +package(default_visibility = ["//tensorflow:internal"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load( + "//tensorflow:tensorflow.bzl", + "tf_gen_op_wrapper_py", + "tf_kernel_library", + "tf_custom_op_library", + "tf_custom_op_py_library", + "tf_gen_op_libs", + "tf_py_test", + "if_not_windows", + "if_windows", +) + +py_library( + name = "ignite", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_ops", + ], +) + +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = ["ops/dataset_ops.cc"], + deps = [":dataset_kernels"], +) + +tf_gen_op_libs( + op_lib_names = ["dataset_ops"], +) + +cc_library( + name = "dataset_kernels", + srcs = [ + "kernels/ignite_dataset_ops.cc", + "kernels/ignite_client.h", + "kernels/ignite_client.cc", + "kernels/ignite_plain_client.h", + "kernels/ignite_ssl_wrapper.h", + "kernels/ignite_ssl_wrapper.cc", + "kernels/ignite_binary_object_parser.h", + "kernels/ignite_binary_object_parser.cc", + "kernels/ignite_dataset.h", + "kernels/ignite_dataset.cc", + "kernels/ignite_dataset_iterator.h", + "kernels/ignite_dataset_iterator.cc", + ] + if_not_windows([ + "kernels/ignite_plain_client_unix.cc", + ]) + if_windows([ + "kernels/ignite_plain_client_windows.cc", + ]), + deps = [ + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + "@boringssl//:ssl", + "@protobuf_archive//:protobuf_headers", + ], + alwayslink = 1, +) + +py_library( + name = "dataset_ops", + srcs = [ + "python/ops/ignite_dataset_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":ignite_op_loader", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_dataset_ops", + out = "python/ops/gen_dataset_ops.py", + deps = ["//tensorflow/contrib/ignite:dataset_ops_op_lib"], +) + +tf_kernel_library( + name = "dataset_ops_kernels", + deps = [ + ":dataset_kernels", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +tf_custom_op_py_library( + name = "ignite_op_loader", + srcs = ["python/ops/ignite_op_loader.py"], + dso = ["//tensorflow/contrib/ignite:_dataset_ops.so"], + kernels = [ + ":dataset_ops_kernels", + "//tensorflow/contrib/ignite:dataset_ops_op_lib", + ], + srcs_version = "PY2AND3", + deps = [ + ":gen_dataset_ops", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + ], +) + +# The Apache Ignite servers have to setup before the test and tear down +# after the test manually. The docker engine has to be installed. +# +# To setup Apache Ignite servers: +# $ bash ./python/tests/start_ignite.sh +# +# To tear down Apache Ignite servers: +# $ bash ./python/tests/stop_ignite.sh +tf_py_test( + name = "ignite_dataset_test", + srcs = ["python/tests/ignite_dataset_test.py"], + additional_deps = [ + ":ignite", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], + tags = [ + "manual", + "no_windows", + "notap", + ], +) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md new file mode 100644 index 0000000000..9054344e94 --- /dev/null +++ b/tensorflow/contrib/ignite/README.md @@ -0,0 +1,167 @@ +### Ignite Dataset +# Ignite Dataset + +- [Overview](#overview) +- [Features](#features) + * [Distributed In-Memory Datasource](#distributed-in-memory-datasource) + * [Structured Objects](#structured-objects) + * [Distributed Training](#distributed-training) + * [SSL Connection](#ssl-connection) + * [Windows Support](#windows-support) +- [Try it out](#try-it-out) +- [Limitations](#limitations) + +## Overview + +[Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for +transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. + +## Features + +Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below. + +### Distributed In-Memory Datasource +[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. +- If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations. +- If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations. +- If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow. + +It's important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference. + +```bash +$ apache-ignite-fabric/bin/ignite.sh +$ apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://localhost:10800/" + +jdbc:ignite:thin://localhost/> CREATE TABLE KITTEN_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (1, 'WARM KITTY'); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (2, 'SOFT KITTY'); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL OF FUR'); +``` + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="SQL_PUBLIC_KITTEN_CACHE") +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> for _ in range(3): +>>> print(sess.run(next_obj)) + +{'key': 1, 'val': {'NAME': b'WARM KITTY'}} +{'key': 2, 'val': {'NAME': b'SOFT KITTY'}} +{'key': 3, 'val': {'NAME': b'LITTLE BALL OF FUR'}} +``` + +### Structured Objects +[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES") +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> print(sess.run(next_obj)) + +{ + 'key': 'kitten.png', + 'val': { + 'metadata': { + 'file_name': b'kitten.png', + 'label': b'little ball of fur', + width: 800, + height: 600 + }, + 'pixels': [0, 0, 0, 0, ..., 0] + } +} +``` + Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES").map(lambda obj: obj['val']['pixels']) +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> print(sess.run(next_obj)) + +[0, 0, 0, 0, ..., 0] +``` + +### Distributed Training + +TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. + + + +Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck. + +Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition. + +Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset("IMAGES") +>>> +>>> # Compute gradients locally on every worker node. +>>> gradients = [] +>>> for i in range(5): +>>> with tf.device("/job:WORKER/task:%d" % i): +>>> device_iterator = dataset.make_one_shot_iterator() +>>> device_next_obj = device_iterator.get_next() +>>> gradient = compute_gradient(device_next_obj) +>>> gradients.append(gradient) +>>> +>>> # Aggregate them on master node. +>>> result_gradient = tf.reduce_sum(gradients) +>>> +>>> with tf.Session("grpc://localhost:10000") as sess: +>>> print(sess.run(result_gradient)) +``` + +High-level TensorFlow API for [distributed training](https://www.tensorflow.org/api_docs/python/tf/contrib/distribute/DistributionStrategy) is supported as well. + +### SSL Connection + +Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES", certfile="client.pem", cert_password="password", username="ignite", password="ignite") +>>> ... +``` + +### Windows Support + +Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. + +## Try it out + +The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: + +``` +docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist +``` + +After that you will be able to work with it following way: + +![ignite-dataset-mnist](https://s3.amazonaws.com/helloworld23423423ew23/ignite-dataset-mnist.png "Ignite Dataset Mnist") + +## Limitations + +Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. \ No newline at end of file diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py new file mode 100644 index 0000000000..468920a557 --- /dev/null +++ b/tensorflow/contrib/ignite/__init__.py @@ -0,0 +1,42 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Apache Ignite is a memory-centric distributed database, caching, and + processing platform for transactional, analytical, and streaming workloads, + delivering in-memory speeds at petabyte scale. This contrib package + contains an integration between Apache Ignite and TensorFlow. The + integration is based on tf.data from TensorFlow side and Binary Client + Protocol from Apache Ignite side. It allows to use Apache Ignite as a + datasource for neural network training, inference and all other + computations supported by TensorFlow. Ignite Dataset is based on Apache + Ignite Binary Client Protocol: + https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. + +@@IgniteDataset +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \ +import IgniteDataset + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + "IgniteDataset", +] + +remove_undocumented(__name__) diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc new file mode 100644 index 0000000000..bf0ef8766e --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -0,0 +1,304 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_binary_object_parser.h" + +namespace ignite { + +tensorflow::Status BinaryObjectParser::Parse( + uint8_t*& ptr, std::vector& out_tensors, + std::vector& types) { + uint8_t object_type_id = *ptr; + ptr += 1; + + switch (object_type_id) { + case BYTE: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT8, {}); + tensor.scalar()() = *((uint8_t*)ptr); + ptr += 1; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case SHORT: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT16, {}); + tensor.scalar()() = *((int16_t*)ptr); + ptr += 2; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case INT: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT32, {}); + tensor.scalar()() = *((int32_t*)ptr); + ptr += 4; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case LONG: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, {}); + tensor.scalar()() = *((int64_t*)ptr); + ptr += 8; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case FLOAT: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_FLOAT, {}); + tensor.scalar()() = *((float*)ptr); + ptr += 4; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case DOUBLE: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_DOUBLE, {}); + tensor.scalar()() = *((double*)ptr); + ptr += 8; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case UCHAR: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT16, {}); + tensor.scalar()() = *((uint16_t*)ptr); + ptr += 2; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case BOOL: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_BOOL, {}); + tensor.scalar()() = *((bool*)ptr); + ptr += 1; + out_tensors.emplace_back(std::move(tensor)); + + break; + } + case STRING: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_STRING, {}); + tensor.scalar()() = std::string((char*)ptr, length); + ptr += length; + out_tensors.emplace_back(std::move(tensor)); + + break; + } + case DATE: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, {}); + tensor.scalar()() = *((int64_t*)ptr); + ptr += 8; + out_tensors.emplace_back(std::move(tensor)); + + break; + } + case BYTE_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT8, + tensorflow::TensorShape({length})); + + uint8_t* arr = (uint8_t*)ptr; + ptr += length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case SHORT_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT16, + tensorflow::TensorShape({length})); + + int16_t* arr = (int16_t*)ptr; + ptr += length * 2; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case INT_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT32, + tensorflow::TensorShape({length})); + + int32_t* arr = (int32_t*)ptr; + ptr += length * 4; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case LONG_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, + tensorflow::TensorShape({length})); + + int64_t* arr = (int64_t*)ptr; + ptr += length * 8; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case FLOAT_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_FLOAT, + tensorflow::TensorShape({length})); + + float* arr = (float*)ptr; + ptr += 4 * length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case DOUBLE_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_DOUBLE, + tensorflow::TensorShape({length})); + + double* arr = (double*)ptr; + ptr += 8 * length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case UCHAR_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT16, + tensorflow::TensorShape({length})); + + uint16_t* arr = (uint16_t*)ptr; + ptr += length * 2; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case BOOL_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_BOOL, + tensorflow::TensorShape({length})); + + bool* arr = (bool*)ptr; + ptr += length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case STRING_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_STRING, + tensorflow::TensorShape({length})); + + for (int32_t i = 0; i < length; i++) { + int32_t str_length = *((int32_t*)ptr); + ptr += 4; + const int8_t* str = (const int8_t*)ptr; + ptr += str_length; + tensor.vec()(i) = std::string((char*)str, str_length); + } + + out_tensors.emplace_back(std::move(tensor)); + break; + } + case DATE_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, + tensorflow::TensorShape({length})); + int64_t* arr = (int64_t*)ptr; + ptr += length * 8; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case WRAPPED_OBJ: { + int32_t byte_arr_size = *((int32_t*)ptr); + ptr += 4; + + tensorflow::Status status = Parse(ptr, out_tensors, types); + if (!status.ok()) return status; + + int32_t offset = *((int32_t*)ptr); + ptr += 4; + + break; + } + case COMPLEX_OBJ: { + uint8_t version = *ptr; + ptr += 1; + int16_t flags = *((int16_t*)ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 + ptr += 2; + int32_t type_id = *((int32_t*)ptr); + ptr += 4; + int32_t hash_code = *((int32_t*)ptr); + ptr += 4; + int32_t length = *((int32_t*)ptr); + ptr += 4; + int32_t schema_id = *((int32_t*)ptr); + ptr += 4; + int32_t schema_offset = *((int32_t*)ptr); + ptr += 4; + + uint8_t* end = ptr + schema_offset - 24; + int32_t i = 0; + while (ptr < end) { + i++; + tensorflow::Status status = Parse(ptr, out_tensors, types); + if (!status.ok()) return status; + } + + ptr += (length - schema_offset); + + break; + } + default: { + return tensorflow::errors::Internal("Unknowd binary type (type id ", + (int)object_type_id, ")"); + } + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h new file mode 100644 index 0000000000..1e845cbc56 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -0,0 +1,54 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/core/status.h" + +namespace ignite { + +class BinaryObjectParser { + public: + tensorflow::Status Parse(uint8_t*& ptr, + std::vector& out_tensors, + std::vector& types); +}; + +enum ObjectType { + BYTE = 1, + SHORT = 2, + INT = 3, + LONG = 4, + FLOAT = 5, + DOUBLE = 6, + UCHAR = 7, + BOOL = 8, + STRING = 9, + DATE = 11, + BYTE_ARR = 12, + SHORT_ARR = 13, + INT_ARR = 14, + LONG_ARR = 15, + FLOAT_ARR = 16, + DOUBLE_ARR = 17, + UCHAR_ARR = 18, + BOOL_ARR = 19, + STRING_ARR = 20, + DATE_ARR = 22, + WRAPPED_OBJ = 27, + COMPLEX_OBJ = 103 +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc new file mode 100644 index 0000000000..5a8eddb944 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_client.cc @@ -0,0 +1,55 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +namespace ignite { + +tensorflow::Status Client::ReadByte(uint8_t& data) { + return ReadData((uint8_t*)&data, 1); +} + +tensorflow::Status Client::ReadShort(int16_t& data) { + return ReadData((uint8_t*)&data, 2); +} + +tensorflow::Status Client::ReadInt(int32_t& data) { + return ReadData((uint8_t*)&data, 4); +} + +tensorflow::Status Client::ReadLong(int64_t& data) { + return ReadData((uint8_t*)&data, 8); +} + +tensorflow::Status Client::WriteByte(uint8_t data) { + return WriteData((uint8_t*)&data, 1); +} + +tensorflow::Status Client::WriteShort(int16_t data) { + return WriteData((uint8_t*)&data, 2); +} + +tensorflow::Status Client::WriteInt(int32_t data) { + return WriteData((uint8_t*)&data, 4); +} + +tensorflow::Status Client::WriteLong(int64_t data) { + return WriteData((uint8_t*)&data, 8); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h new file mode 100644 index 0000000000..64e28d75f0 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -0,0 +1,40 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/lib/core/status.h" + +namespace ignite { + +class Client { + public: + virtual tensorflow::Status Connect() = 0; + virtual tensorflow::Status Disconnect() = 0; + virtual bool IsConnected() = 0; + virtual int GetSocketDescriptor() = 0; + + virtual tensorflow::Status ReadByte(uint8_t& data); + virtual tensorflow::Status ReadShort(int16_t& data); + virtual tensorflow::Status ReadInt(int32_t& data); + virtual tensorflow::Status ReadLong(int64_t& data); + virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0; + + virtual tensorflow::Status WriteByte(uint8_t data); + virtual tensorflow::Status WriteShort(int16_t data); + virtual tensorflow::Status WriteInt(int32_t data); + virtual tensorflow::Status WriteLong(int64_t data); + virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0; +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc new file mode 100644 index 0000000000..a9bf26955b --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -0,0 +1,123 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_dataset_iterator.h" +#include "tensorflow/core/platform/logging.h" + +namespace ignite { + +IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx, + std::string cache_name, std::string host, + tensorflow::int32 port, bool local, + tensorflow::int32 part, + tensorflow::int32 page_size, std::string username, + std::string password, std::string certfile, + std::string keyfile, std::string cert_password, + std::vector schema, + std::vector permutation) + : DatasetBase(tensorflow::DatasetContext(ctx)), + cache_name(cache_name), + host(host), + port(port), + local(local), + part(part), + page_size(page_size), + username(username), + password(password), + certfile(certfile), + keyfile(keyfile), + cert_password(cert_password), + schema(schema), + permutation(permutation) { + SchemaToTypes(); + SchemaToShapes(); + + LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name + << "', host='" << host << "', port=" << port << ", local=" << local + << ", part=" << part << ", page_size=" << page_size + << ", username='" << username << "', certfile='" << certfile + << "', keyfile='" << keyfile + "']"; +} + +IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } + +std::unique_ptr IgniteDataset::MakeIteratorInternal( + const tensorflow::string& prefix) const { + return std::unique_ptr(new IgniteDatasetIterator( + {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host, + this->port, this->cache_name, this->local, this->part, this->page_size, + this->username, this->password, this->certfile, this->keyfile, + this->cert_password, this->schema, this->permutation)); +} + +const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const { + return dtypes; +} + +const std::vector& +IgniteDataset::output_shapes() const { + return shapes; +} + +tensorflow::string IgniteDataset::DebugString() const { + return "IgniteDatasetOp::Dataset"; +} + +tensorflow::Status IgniteDataset::AsGraphDefInternal( + tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, + tensorflow::Node** output) const { + return tensorflow::errors::Unimplemented( + "IgniteDataset does not support 'AsGraphDefInternal'"); +} + +void IgniteDataset::SchemaToTypes() { + for (auto e : schema) { + if (e == BYTE || e == BYTE_ARR) { + dtypes.push_back(tensorflow::DT_UINT8); + } else if (e == SHORT || e == SHORT_ARR) { + dtypes.push_back(tensorflow::DT_INT16); + } else if (e == INT || e == INT_ARR) { + dtypes.push_back(tensorflow::DT_INT32); + } else if (e == LONG || e == LONG_ARR) { + dtypes.push_back(tensorflow::DT_INT64); + } else if (e == FLOAT || e == FLOAT_ARR) { + dtypes.push_back(tensorflow::DT_FLOAT); + } else if (e == DOUBLE || e == DOUBLE_ARR) { + dtypes.push_back(tensorflow::DT_DOUBLE); + } else if (e == UCHAR || e == UCHAR_ARR) { + dtypes.push_back(tensorflow::DT_UINT8); + } else if (e == BOOL || e == BOOL_ARR) { + dtypes.push_back(tensorflow::DT_BOOL); + } else if (e == STRING || e == STRING_ARR) { + dtypes.push_back(tensorflow::DT_STRING); + } else { + LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; + } + } +} + +void IgniteDataset::SchemaToShapes() { + for (auto e : schema) { + if (e >= 1 && e < 10) { + shapes.push_back(tensorflow::PartialTensorShape({})); + } else if (e >= 12 && e < 21) { + shapes.push_back(tensorflow::PartialTensorShape({-1})); + } else { + LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; + } + } +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h new file mode 100644 index 0000000000..2120dfd342 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/dataset.h" + +namespace ignite { + +class IgniteDataset : public tensorflow::DatasetBase { + public: + IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name, + std::string host, tensorflow::int32 port, bool local, + tensorflow::int32 part, tensorflow::int32 page_size, + std::string username, std::string password, + std::string certfile, std::string keyfile, + std::string cert_password, + std::vector schema, + std::vector permutation); + ~IgniteDataset(); + std::unique_ptr MakeIteratorInternal( + const tensorflow::string& prefix) const override; + const tensorflow::DataTypeVector& output_dtypes() const override; + const std::vector& output_shapes() + const override; + tensorflow::string DebugString() const override; + + protected: + tensorflow::Status AsGraphDefInternal( + tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, + tensorflow::Node** output) const override; + + private: + const std::string cache_name; + const std::string host; + const tensorflow::int32 port; + const bool local; + const tensorflow::int32 part; + const tensorflow::int32 page_size; + const std::string username; + const std::string password; + const std::string certfile; + const std::string keyfile; + const std::string cert_password; + const std::vector schema; + const std::vector permutation; + + tensorflow::DataTypeVector dtypes; + std::vector shapes; + + void SchemaToTypes(); + void SchemaToShapes(); +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc new file mode 100644 index 0000000000..03cc3c1291 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -0,0 +1,447 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_dataset_iterator.h" + +#include "ignite_plain_client.h" +#include "ignite_ssl_wrapper.h" +#include "tensorflow/core/platform/logging.h" + +#include +#include + +namespace ignite { + +#define CHECK_STATUS(status) \ + if (!status.ok()) return status; + +IgniteDatasetIterator::IgniteDatasetIterator( + const Params& params, std::string host, tensorflow::int32 port, + std::string cache_name, bool local, tensorflow::int32 part, + tensorflow::int32 page_size, std::string username, std::string password, + std::string certfile, std::string keyfile, std::string cert_password, + std::vector schema, + std::vector permutation) + : tensorflow::DatasetIterator(params), + cache_name(cache_name), + local(local), + part(part), + page_size(page_size), + username(username), + password(password), + schema(schema), + permutation(permutation), + remainder(-1), + cursor_id(-1), + last_page(false) { + Client* p_client = new PlainClient(host, port); + + if (certfile.empty()) + client = std::unique_ptr(p_client); + else + client = std::unique_ptr(new SslWrapper( + std::unique_ptr(p_client), certfile, keyfile, cert_password)); + + LOG(INFO) << "Ignite Dataset Iterator created"; +} + +IgniteDatasetIterator::~IgniteDatasetIterator() { + tensorflow::Status status = CloseConnection(); + if (!status.ok()) LOG(ERROR) << status.ToString(); + + LOG(INFO) << "Ignite Dataset Iterator destroyed"; +} + +tensorflow::Status IgniteDatasetIterator::EstablishConnection() { + if (!client->IsConnected()) { + tensorflow::Status status = client->Connect(); + if (!status.ok()) return status; + + status = Handshake(); + if (!status.ok()) { + tensorflow::Status disconnect_status = client->Disconnect(); + if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); + + return status; + } + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::CloseConnection() { + if (cursor_id != -1 && !last_page) { + tensorflow::Status conn_status = EstablishConnection(); + if (!conn_status.ok()) return conn_status; + + CHECK_STATUS(client->WriteInt(18)); // Message length + CHECK_STATUS( + client->WriteShort(close_connection_opcode)); // Operation code + CHECK_STATUS(client->WriteLong(0)); // Request ID + CHECK_STATUS(client->WriteLong(cursor_id)); // Resource ID + + int32_t res_len; + CHECK_STATUS(client->ReadInt(res_len)); + if (res_len < 12) + return tensorflow::errors::Internal( + "Close Resource Response is corrupted"); + + int64_t req_id; + CHECK_STATUS(client->ReadLong(req_id)); + int32_t status; + CHECK_STATUS(client->ReadInt(status)); + if (status != 0) { + uint8_t err_msg_header; + CHECK_STATUS(client->ReadByte(err_msg_header)); + if (err_msg_header == string_val) { + int32_t err_msg_length; + CHECK_STATUS(client->ReadInt(err_msg_length)); + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + std::string err_msg((char*)err_msg_c, err_msg_length); + delete[] err_msg_c; + + return tensorflow::errors::Internal("Close Resource Error [status=", + status, ", message=", err_msg, "]"); + } + return tensorflow::errors::Internal("Close Resource Error [status=", + status, "]"); + } + + LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; + + cursor_id = -1; + + return client->Disconnect(); + } else { + LOG(INFO) << "Query Cursor " << cursor_id << " is already closed"; + } + + return client->IsConnected() ? client->Disconnect() + : tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::GetNextInternal( + tensorflow::IteratorContext* ctx, + std::vector* out_tensors, bool* end_of_sequence) { + if (remainder == 0 && last_page) { + LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; + + cursor_id = -1; + *end_of_sequence = true; + return tensorflow::Status::OK(); + } else { + tensorflow::Status status = EstablishConnection(); + if (!status.ok()) return status; + + if (remainder == -1 || remainder == 0) { + tensorflow::Status status = + remainder == -1 ? ScanQuery() : LoadNextPage(); + if (!status.ok()) return status; + } + + uint8_t* initial_ptr = ptr; + std::vector types; + std::vector tensors; + + status = parser.Parse(ptr, tensors, types); // Parse key + if (!status.ok()) return status; + + status = parser.Parse(ptr, tensors, types); // Parse val + if (!status.ok()) return status; + + remainder -= (ptr - initial_ptr); + + out_tensors->resize(tensors.size()); + for (int32_t i = 0; i < tensors.size(); i++) + (*out_tensors)[permutation[i]] = std::move(tensors[i]); + + *end_of_sequence = false; + return tensorflow::Status::OK(); + } + + *end_of_sequence = true; + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::SaveInternal( + tensorflow::IteratorStateWriter* writer) { + return tensorflow::errors::Unimplemented( + "Iterator for IgniteDataset does not support 'SaveInternal'"); +} + +tensorflow::Status IgniteDatasetIterator::RestoreInternal( + tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) { + return tensorflow::errors::Unimplemented( + "Iterator for IgniteDataset does not support 'RestoreInternal')"); +} + +tensorflow::Status IgniteDatasetIterator::Handshake() { + int32_t msg_len = 8; + + if (username.empty()) + msg_len += 1; + else + msg_len += 5 + username.length(); + + if (password.empty()) + msg_len += 1; + else + msg_len += 5 + password.length(); + + CHECK_STATUS(client->WriteInt(msg_len)); + CHECK_STATUS(client->WriteByte(1)); + CHECK_STATUS(client->WriteShort(protocol_major_version)); + CHECK_STATUS(client->WriteShort(protocol_minor_version)); + CHECK_STATUS(client->WriteShort(protocol_patch_version)); + CHECK_STATUS(client->WriteByte(2)); + if (username.empty()) { + CHECK_STATUS(client->WriteByte(null_val)); + } else { + CHECK_STATUS(client->WriteByte(string_val)); + CHECK_STATUS(client->WriteInt(username.length())); + CHECK_STATUS( + client->WriteData((uint8_t*)username.c_str(), username.length())); + } + + if (password.empty()) { + CHECK_STATUS(client->WriteByte(null_val)); + } else { + CHECK_STATUS(client->WriteByte(string_val)); + CHECK_STATUS(client->WriteInt(password.length())); + CHECK_STATUS( + client->WriteData((uint8_t*)password.c_str(), password.length())); + } + + int32_t handshake_res_len; + CHECK_STATUS(client->ReadInt(handshake_res_len)); + uint8_t handshake_res; + CHECK_STATUS(client->ReadByte(handshake_res)); + + LOG(INFO) << "Handshake length " << handshake_res_len << ", res " + << (int16_t)handshake_res; + + if (handshake_res != 1) { + int16_t serv_ver_major; + CHECK_STATUS(client->ReadShort(serv_ver_major)); + int16_t serv_ver_minor; + CHECK_STATUS(client->ReadShort(serv_ver_minor)); + int16_t serv_ver_patch; + CHECK_STATUS(client->ReadShort(serv_ver_patch)); + uint8_t header; + CHECK_STATUS(client->ReadByte(header)); + + if (header == string_val) { + int32_t length; + CHECK_STATUS(client->ReadInt(length)); + uint8_t* err_msg_c = new uint8_t[length]; + CHECK_STATUS(client->ReadData(err_msg_c, length)); + std::string err_msg((char*)err_msg_c, length); + delete[] err_msg_c; + + return tensorflow::errors::Internal( + "Handshake Error [result=", handshake_res, ", version=", + serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, + ", message='", err_msg, "']"); + } else if (header == null_val) { + return tensorflow::errors::Internal( + "Handshake Error [result=", handshake_res, ", version=", + serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + } else { + return tensorflow::errors::Internal( + "Handshake Error [result=", handshake_res, ", version=", + serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + } + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::ScanQuery() { + CHECK_STATUS(client->WriteInt(25)); // Message length + CHECK_STATUS(client->WriteShort(scan_query_opcode)); // Operation code + CHECK_STATUS(client->WriteLong(0)); // Request ID + CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name))); // Cache name + CHECK_STATUS(client->WriteByte(0)); // Flags + CHECK_STATUS(client->WriteByte(null_val)); // Filter object + CHECK_STATUS(client->WriteInt(page_size)); // Cursor page size + CHECK_STATUS(client->WriteInt(part)); // Partition to query + CHECK_STATUS(client->WriteByte(local)); // Local flag + + int64_t wait_start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + int32_t res_len; + CHECK_STATUS(client->ReadInt(res_len)); + + int64_t wait_stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms"; + + if (res_len < 12) + return tensorflow::errors::Internal("Scan Query Response is corrupted"); + + int64_t req_id; + CHECK_STATUS(client->ReadLong(req_id)); + + int32_t status; + CHECK_STATUS(client->ReadInt(status)); + + if (status != 0) { + uint8_t err_msg_header; + CHECK_STATUS(client->ReadByte(err_msg_header)); + + if (err_msg_header == string_val) { + int32_t err_msg_length; + CHECK_STATUS(client->ReadInt(err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + std::string err_msg((char*)err_msg_c, err_msg_length); + delete[] err_msg_c; + + return tensorflow::errors::Internal("Scan Query Error [status=", status, + ", message=", err_msg, "]"); + } + return tensorflow::errors::Internal("Scan Query Error [status=", status, + "]"); + } + + CHECK_STATUS(client->ReadLong(cursor_id)); + + LOG(INFO) << "Query Cursor " << cursor_id << " is opened"; + + int32_t row_cnt; + CHECK_STATUS(client->ReadInt(row_cnt)); + + remainder = res_len - 25; + page = std::unique_ptr(new uint8_t[remainder]); + ptr = page.get(); + + int64_t start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + CHECK_STATUS(client->ReadData(ptr, remainder)); + + int64_t stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + ; + + double size_in_mb = 1.0 * remainder / 1024 / 1024; + double time_in_s = 1.0 * (stop - start) / 1000; + LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 + << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; + + uint8_t last_page_b; + CHECK_STATUS(client->ReadByte(last_page_b)); + + last_page = !last_page_b; + + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::LoadNextPage() { + CHECK_STATUS(client->WriteInt(18)); // Message length + CHECK_STATUS(client->WriteShort(load_next_page_opcode)); // Operation code + CHECK_STATUS(client->WriteLong(0)); // Request ID + CHECK_STATUS(client->WriteLong(cursor_id)); // Cursor ID + + int64_t wait_start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + int32_t res_len; + CHECK_STATUS(client->ReadInt(res_len)); + + int64_t wait_stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms"; + + if (res_len < 12) + return tensorflow::errors::Internal("Load Next Page Response is corrupted"); + + int64_t req_id; + CHECK_STATUS(client->ReadLong(req_id)); + + int32_t status; + CHECK_STATUS(client->ReadInt(status)); + + if (status != 0) { + uint8_t err_msg_header; + CHECK_STATUS(client->ReadByte(err_msg_header)); + + if (err_msg_header == string_val) { + int32_t err_msg_length; + CHECK_STATUS(client->ReadInt(err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + std::string err_msg((char*)err_msg_c, err_msg_length); + delete[] err_msg_c; + + return tensorflow::errors::Internal("Load Next Page Error [status=", + status, ", message=", err_msg, "]"); + } + return tensorflow::errors::Internal("Load Next Page Error [status=", status, + "]"); + } + + int32_t row_cnt; + CHECK_STATUS(client->ReadInt(row_cnt)); + + remainder = res_len - 17; + page = std::unique_ptr(new uint8_t[remainder]); + ptr = page.get(); + + int64_t start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + CHECK_STATUS(client->ReadData(ptr, remainder)); + + int64_t stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + ; + + double size_in_mb = 1.0 * remainder / 1024 / 1024; + double time_in_s = 1.0 * (stop - start) / 1000; + LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 + << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; + + uint8_t last_page_b; + CHECK_STATUS(client->ReadByte(last_page_b)); + + last_page = !last_page_b; + + return tensorflow::Status::OK(); +} + +int32_t IgniteDatasetIterator::JavaHashCode(std::string str) { + int32_t h = 0; + for (char& c : str) { + h = 31 * h + c; + } + return h; +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h new file mode 100644 index 0000000000..d1df4527f9 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -0,0 +1,87 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_binary_object_parser.h" +#include "ignite_dataset.h" + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +namespace ignite { + +class IgniteDatasetIterator + : public tensorflow::DatasetIterator { + public: + IgniteDatasetIterator(const Params& params, std::string host, + tensorflow::int32 port, std::string cache_name, + bool local, tensorflow::int32 part, + tensorflow::int32 page_size, std::string username, + std::string password, std::string certfile, + std::string keyfile, std::string cert_password, + std::vector schema, + std::vector permutation); + ~IgniteDatasetIterator(); + tensorflow::Status GetNextInternal( + tensorflow::IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override; + + protected: + tensorflow::Status SaveInternal( + tensorflow::IteratorStateWriter* writer) override; + tensorflow::Status RestoreInternal( + tensorflow::IteratorContext* ctx, + tensorflow::IteratorStateReader* reader) override; + + private: + std::unique_ptr client; + BinaryObjectParser parser; + + const std::string cache_name; + const bool local; + const tensorflow::int32 part; + const tensorflow::int32 page_size; + const std::string username; + const std::string password; + const std::vector schema; + const std::vector permutation; + + int32_t remainder; + int64_t cursor_id; + bool last_page; + + std::unique_ptr page; + uint8_t* ptr; + + tensorflow::Status EstablishConnection(); + tensorflow::Status CloseConnection(); + tensorflow::Status Handshake(); + tensorflow::Status ScanQuery(); + tensorflow::Status LoadNextPage(); + int32_t JavaHashCode(std::string str); +}; + +constexpr uint8_t null_val = 101; +constexpr uint8_t string_val = 9; +constexpr uint8_t protocol_major_version = 1; +constexpr uint8_t protocol_minor_version = 1; +constexpr uint8_t protocol_patch_version = 0; +constexpr int16_t scan_query_opcode = 2000; +constexpr int16_t load_next_page_opcode = 2001; +constexpr int16_t close_connection_opcode = 0; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc new file mode 100644 index 0000000000..543b5e4afc --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -0,0 +1,145 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_dataset.h" +#include +#include "tensorflow/core/framework/dataset.h" + +namespace tensorflow { + +class IgniteDatasetOp : public DatasetOpKernel { + public: + using DatasetOpKernel::DatasetOpKernel; + + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + std::string cache_name = ""; + std::string host = ""; + int32 port = -1; + bool local = false; + int32 part = -1; + int32 page_size = -1; + std::string username = ""; + std::string password = ""; + std::string certfile = ""; + std::string keyfile = ""; + std::string cert_password = ""; + + const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME"); + const char* env_host = std::getenv("IGNITE_DATASET_HOST"); + const char* env_port = std::getenv("IGNITE_DATASET_PORT"); + const char* env_local = std::getenv("IGNITE_DATASET_LOCAL"); + const char* env_part = std::getenv("IGNITE_DATASET_PART"); + const char* env_page_size = std::getenv("IGNITE_DATASET_PAGE_SIZE"); + const char* env_username = std::getenv("IGNITE_DATASET_USERNAME"); + const char* env_password = std::getenv("IGNITE_DATASET_PASSWORD"); + const char* env_certfile = std::getenv("IGNITE_DATASET_CERTFILE"); + const char* env_keyfile = std::getenv("IGNITE_DATASET_KEYFILE"); + const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD"); + + if (env_cache_name) + cache_name = std::string(env_cache_name); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cache_name", + &cache_name)); + + if (env_host) + host = std::string(env_host); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); + + if (env_port) + port = atoi(env_port); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "port", &port)); + + if (env_local) + local = true; + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "local", &local)); + + if (env_part) + part = atoi(env_part); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "part", &part)); + + if (env_page_size) + page_size = atoi(env_page_size); + else + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "page_size", &page_size)); + + if (env_username) + username = std::string(env_username); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "username", &username)); + + if (env_password) + password = std::string(env_password); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "password", &password)); + + if (env_certfile) + certfile = std::string(env_certfile); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "certfile", &certfile)); + + if (env_keyfile) + keyfile = std::string(env_keyfile); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "keyfile", &keyfile)); + + if (env_cert_password) + cert_password = std::string(env_cert_password); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", + &cert_password)); + + const Tensor* schema_tensor; + OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); + OP_REQUIRES(ctx, schema_tensor->dims() == 1, + errors::InvalidArgument("`schema` must be a vector.")); + + std::vector schema; + schema.reserve(schema_tensor->NumElements()); + for (int i = 0; i < schema_tensor->NumElements(); i++) { + schema.push_back(schema_tensor->flat()(i)); + } + + const Tensor* permutation_tensor; + OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor)); + OP_REQUIRES(ctx, schema_tensor->dims() == 1, + errors::InvalidArgument("`permutation` must be a vector.")); + + std::vector permutation; + permutation.reserve(permutation_tensor->NumElements()); + for (int i = 0; i < permutation_tensor->NumElements(); i++) { + permutation.push_back(permutation_tensor->flat()(i)); + } + + *output = new ignite::IgniteDataset( + ctx, cache_name, host, port, local, part, page_size, username, password, + certfile, keyfile, cert_password, std::move(schema), + std::move(permutation)); + } +}; + +REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU), + IgniteDatasetOp); + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h new file mode 100644 index 0000000000..5491af68d6 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +#include + +namespace ignite { + +class PlainClient : public Client { + public: + PlainClient(std::string host, int port); + ~PlainClient(); + + virtual tensorflow::Status Connect(); + virtual tensorflow::Status Disconnect(); + virtual bool IsConnected(); + virtual int GetSocketDescriptor(); + virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); + virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + + private: + std::string host; + int port; + int sock; +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc new file mode 100644 index 0000000000..dbfa4f8786 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -0,0 +1,132 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_plain_client.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +namespace ignite { + +PlainClient::PlainClient(std::string host, int port) + : host(host), port(port), sock(-1) {} + +PlainClient::~PlainClient() { + if (IsConnected()) { + tensorflow::Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } +} + +tensorflow::Status PlainClient::Connect() { + if (sock == -1) { + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock == -1) + return tensorflow::errors::Internal("Failed to create socket"); + } + + sockaddr_in server; + + server.sin_addr.s_addr = inet_addr(host.c_str()); + if (server.sin_addr.s_addr == -1) { + hostent* he; + in_addr** addr_list; + + if ((he = gethostbyname(host.c_str())) == NULL) + return tensorflow::errors::Internal("Failed to resolve hostname \"", host, + "\""); + + addr_list = (in_addr**)he->h_addr_list; + if (addr_list[0] != NULL) server.sin_addr = *addr_list[0]; + } + + server.sin_family = AF_INET; + server.sin_port = htons(port); + + if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0) + return tensorflow::errors::Internal("Failed to connect to \"", host, ":", + port, "\""); + + LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::Disconnect() { + int close_res = close(sock); + sock = -1; + + LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed"; + + return close_res == 0 ? tensorflow::Status::OK() + : tensorflow::errors::Internal( + "Failed to correctly close connection"); +} + +bool PlainClient::IsConnected() { return sock != -1; } + +int PlainClient::GetSocketDescriptor() { return sock; } + +tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) { + int recieved = 0; + + while (recieved < length) { + int res = recv(sock, buf, length - recieved, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while reading from socket: ", res, ", ", + std::string(strerror(errno))); + + if (res == 0) + return tensorflow::errors::Internal("Server closed connection"); + + recieved += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) { + int sent = 0; + + while (sent < length) { + int res = send(sock, buf, length - sent, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while writing into socket: ", res, ", ", + std::string(strerror(errno))); + + sent += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc new file mode 100644 index 0000000000..f78c9b3627 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -0,0 +1,143 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_plain_client.h" + +#define WIN32_LEAN_AND_MEAN +#include +#include +#include + +#pragma comment(lib, "Ws2_32.lib") +#pragma comment(lib, "Mswsock.lib") +#pragma comment(lib, "AdvApi32.lib") + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +namespace ignite { + +PlainClient::PlainClient(std::string host, int port) + : host(host), port(port), sock(INVALID_SOCKET) {} + +PlainClient::~PlainClient() { + if (IsConnected()) { + tensorflow::Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } +} + +tensorflow::Status PlainClient::Connect() { + WSADATA wsaData; + addrinfo *result = NULL, *ptr = NULL, hints; + + int res = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (res != 0) + return tensorflow::errors::Internal("WSAStartup failed with error: ", res); + + ZeroMemory(&hints, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + + res = + getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result); + if (res != 0) + return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res); + + for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { + sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); + if (sock == INVALID_SOCKET) { + WSACleanup(); + return tensorflow::errors::Internal("Socket failed with error: ", + WSAGetLastError()); + } + + res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen); + if (res == SOCKET_ERROR) { + closesocket(sock); + sock = INVALID_SOCKET; + continue; + } + + break; + } + + freeaddrinfo(result); + + if (sock == INVALID_SOCKET) { + WSACleanup(); + return tensorflow::errors::Internal("Unable to connect to server"); + } + + LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::Disconnect() { + int res = shutdown(sock, SD_SEND); + closesocket(sock); + WSACleanup(); + + if (res == SOCKET_ERROR) + return tensorflow::errors::Internal("Shutdown failed with error: ", + WSAGetLastError()); + else + return tensorflow::Status::OK(); +} + +bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; } + +int PlainClient::GetSocketDescriptor() { return sock; } + +tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) { + int recieved = 0; + + while (recieved < length) { + int res = recv(sock, buf, length - recieved, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while reading from socket: ", res); + + if (res == 0) + return tensorflow::errors::Internal("Server closed connection"); + + recieved += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) { + int sent = 0; + + while (sent < length) { + int res = send(sock, buf, length - sent, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while writing into socket: ", res); + + sent += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc new file mode 100644 index 0000000000..a1101b91f3 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -0,0 +1,149 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_ssl_wrapper.h" + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +#include +#include + +namespace ignite { + +static int PasswordCb(char *buf, int size, int rwflag, void *password) { + strncpy(buf, (char *)(password), size); + buf[size - 1] = '\0'; + return (strlen(buf)); +} + +SslWrapper::SslWrapper(std::shared_ptr client, std::string certfile, + std::string keyfile, std::string cert_password) + : client(client), + certfile(certfile), + keyfile(keyfile), + cert_password(cert_password), + ctx(NULL) {} + +SslWrapper::~SslWrapper() { + if (IsConnected()) { + tensorflow::Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } + + if (ctx != NULL) { + SSL_CTX_free(ctx); + ctx = NULL; + } +} + +tensorflow::Status SslWrapper::InitSslContext() { + OpenSSL_add_all_algorithms(); + SSL_load_error_strings(); + + ctx = SSL_CTX_new(SSLv23_method()); + if (ctx == NULL) + return tensorflow::errors::Internal("Couldn't create SSL context"); + + SSL_CTX_set_default_passwd_cb(ctx, PasswordCb); + SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str()); + + if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1) + return tensorflow::errors::Internal( + "Couldn't load cetificate chain (file '", certfile, "')"); + + std::string private_key_file = keyfile.empty() ? certfile : keyfile; + if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(), + SSL_FILETYPE_PEM) != 1) + return tensorflow::errors::Internal("Couldn't load private key (file '", + private_key_file, "')"); + + return tensorflow::Status::OK(); +} + +tensorflow::Status SslWrapper::Connect() { + tensorflow::Status status; + + if (ctx == NULL) { + status = InitSslContext(); + if (!status.ok()) return status; + } + + ssl = SSL_new(ctx); + if (ssl == NULL) + return tensorflow::errors::Internal("Failed to establish SSL connection"); + + status = client->Connect(); + if (!status.ok()) return status; + + SSL_set_fd(ssl, client->GetSocketDescriptor()); + if (SSL_connect(ssl) != 1) + return tensorflow::errors::Internal("Failed to establish SSL connection"); + + LOG(INFO) << "SSL connection established"; + + return tensorflow::Status::OK(); +} + +tensorflow::Status SslWrapper::Disconnect() { + SSL_free(ssl); + + LOG(INFO) << "SSL connection closed"; + + return client->Disconnect(); +} + +bool SslWrapper::IsConnected() { return client->IsConnected(); } + +int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); } + +tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { + int recieved = 0; + + while (recieved < length) { + int res = SSL_read(ssl, buf, length - recieved); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while reading from SSL socket: ", res); + + if (res == 0) + return tensorflow::errors::Internal("Server closed SSL connection"); + + recieved += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { + int sent = 0; + + while (sent < length) { + int res = SSL_write(ssl, buf, length - sent); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while writing into socket: ", res); + + sent += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h new file mode 100644 index 0000000000..e0c2a242dc --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +#include +#include + +namespace ignite { + +class SslWrapper : public Client { + public: + SslWrapper(std::shared_ptr client, std::string certfile, + std::string keyfile, std::string cert_password); + ~SslWrapper(); + + virtual tensorflow::Status Connect(); + virtual tensorflow::Status Disconnect(); + virtual bool IsConnected(); + virtual int GetSocketDescriptor(); + virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); + virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + + private: + std::shared_ptr client; + std::string certfile; + std::string keyfile; + std::string cert_password; + SSL_CTX* ctx; + SSL* ssl; + tensorflow::Status InitSslContext(); +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc new file mode 100644 index 0000000000..17494d1cfd --- /dev/null +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -0,0 +1,64 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("IgniteDataset") + .Input("cache_name: string") + .Input("host: string") + .Input("port: int32") + .Input("local: bool") + .Input("part: int32") + .Input("page_size: int32") + .Input("username: string") + .Input("password: string") + .Input("certfile: string") + .Input("keyfile: string") + .Input("cert_password: string") + .Input("schema: int32") + .Input("permutation: int32") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Apache Ignite is a memory-centric distributed database, caching, and processing +platform for transactional, analytical, and streaming workloads, delivering +in-memory speeds at petabyte scale. This contrib package contains an +integration between Apache Ignite and TensorFlow. The integration is based on +tf.data from TensorFlow side and Binary Client Protocol from Apache Ignite side. +It allows to use Apache Ignite as a datasource for neural network training, +inference and all other computations supported by TensorFlow. Ignite Dataset +is based on Apache Ignite Binary Client Protocol. + +cache_name: Ignite Cache Name. +host: Ignite Thin Client Host. +port: Ignite Thin Client Port. +local: Local flag that defines that data should be fetched from local host only. +part: Partition data should be fetched from. +page_size: Page size for Ignite Thin Client. +username: Username to authenticate via Ignite Thin Client. +password: Password to authenticate via Ignite Thin Client. +certfile: SSL certificate to establish SSL connection. +keyfile: Private key file to establish SSL connection. +cert_password: SSL certificate password to establish SSL connection. +schema: Internal structure that defines schema of cache objects. +permutation: Internal structure that defines permutation of cache objects. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py new file mode 100644 index 0000000000..6fa073957a --- /dev/null +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -0,0 +1,763 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Ignite Dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import socket +import struct +import ssl +import abc + +from tensorflow.contrib.ignite.python.ops import ignite_op_loader # pylint: disable=unused-import +from tensorflow.contrib.ignite.python.ops import gen_dataset_ops +from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape + +class Readable(): + """Readable abstract class that exposes methods to do reading-related + operations. + """ + + @abc.abstractmethod + def __init__(self): + pass + + def read_byte(self): + """Reads and returnes byte.""" + return self.__read("b", 1) + + def read_short(self): + """Reads and returns short (2 bytes, little-endian).""" + return self.__read("h", 2) + + def read_int(self): + """Reads and returns int (4 bytes, little-endian).""" + return self.__read("i", 4) + + def read_long(self): + """Reads and returns long (8 bytes, little-endian).""" + return self.__read("q", 8) + + def skip(self, length): + """Skips the specified number of bytes.""" + self.read_data(length) + + @abc.abstractmethod + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + return None + + def __read(self, data_type, length): + """Reads, unpacks and returns specified type (little-endian).""" + buffer = self.read_data(length) + return struct.unpack("<" + data_type, buffer)[0] + +class DataBuffer(Readable): + """DataBuffer class that exposes methods to read data from a byte buffer.""" + + def __init__(self, buffer): + """Constructs a new instance of DataBuffer based on the specified byte + buffer. + + Args: + buffer: Buffer to be read. + """ + Readable.__init__(self) + self.buffer = buffer + self.ptr = 0 + + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + data_buffer = self.buffer[self.ptr:][:length] + self.ptr += length + return data_buffer + +class TcpClient(Readable): + """TcpClient class that exposes methods to read data from a socket.""" + + def __init__(self, host, port, certfile=None, keyfile=None, password=None): + """Constructs a new instance of TcpClient based on the specified host + and port. + + Args: + host: Host to be connected. + port: Port to be connected. + certfile: File in PEM format containing the certificate as well as any + number of CA certificates needed to establish the certificate’s + authenticity. + keyfile: File containing the private key (otherwise the private key + will be taken from certfile as well). + password: Password to be used if the private key is encrypted and a + password is necessary. + """ + Readable.__init__(self) + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + if certfile is not None: + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.load_cert_chain(certfile, keyfile, password) + self.sock = context.wrap_socket(self.sock) + else: + if keyfile is not None: + raise Exception("SSL is disabled, keyfile must not be specified \ + (to enable SSL specify certfile)") + if password is not None: + raise Exception("SSL is disabled, password must not be specified \ + (to enable SSL specify certfile)") + + self.host = host + self.port = port + + def __enter__(self): + """Connects to host and port specified in the constructor.""" + self.sock.connect((self.host, self.port)) + return self + + def __exit__(self, t, v, traceback): + """Disconnects the socket.""" + self.sock.close() + + def write_byte(self, v): + """Writes the specified byte.""" + self.__write(v, "b") + + def write_short(self, v): + """Writes the specified short (2 bytes, little-endian).""" + self.__write(v, "h") + + def write_int(self, v): + """Writes the specified short (4 bytes, little-endian).""" + self.__write(v, "i") + + def write_long(self, v): + """Writes the specified int (8 bytes, little-endian).""" + self.__write(v, "q") + + def write_string(self, v): + """Writes the specified string.""" + self.sock.sendall(v.encode("UTF-8")) + + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + data_buffer = None + rem = length + while rem > 0: + buf = self.sock.recv(rem) + rem = rem - len(buf) + if data_buffer is None: + data_buffer = buf + else: + data_buffer += buf + return data_buffer + + def __write(self, value, data_type): + """Packs and writes data using the specified type (little-endian).""" + data_buffer = struct.pack("<" + data_type, value) + self.sock.sendall(data_buffer) + +class BinaryType(): + """BinaryType class that encapsulated type id, type name and fields.""" + + def __init__(self, type_id, type_name, fields): + """Constructs a new instance of BinaryType.""" + self.type_id = type_id + self.type_name = type_name + self.fields = fields + +class BinaryField(): + """BinaryField class that encapsulated field name, type id and field id.""" + + def __init__(self, field_name, type_id, field_id): + """Constructs a new instance of BinaryField.""" + self.field_name = field_name + self.type_id = type_id + self.field_id = field_id + +# Binary types defined in Apache Ignite Thin client and supported by +# TensorFlow on Apache Ignite, see +# https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +types = { + 1: (dtypes.uint8, False), + 2: (dtypes.int16, False), + 3: (dtypes.int32, False), + 4: (dtypes.int64, False), + 5: (dtypes.float32, False), + 6: (dtypes.float64, False), + 7: (dtypes.uint16, False), + 8: (dtypes.bool, False), + 9: (dtypes.string, False), + 12: (dtypes.uint8, True), + 13: (dtypes.int16, True), + 14: (dtypes.int32, True), + 15: (dtypes.int64, True), + 16: (dtypes.float32, True), + 17: (dtypes.float64, True), + 18: (dtypes.uint16, True), + 19: (dtypes.bool, True), + 20: (dtypes.string, True) +} + +class TypeTreeNode(): + """TypeTreeNode class exposes methods to format object tree structure + data. + """ + def __init__(self, name, type_id, fields=None, permutation=None): + """Constructs a new instance of TypeTreeNode. + + Args: + name: Name of the object tree node. + type_id: Type id of the object tree node. + fields: List of fields (children of the object tree node). + permutation: Permutation that should be applied to order object children. + """ + self.name = name + self.type_id = type_id + self.fields = fields + self.permutation = permutation + + def to_output_classes(self): + """Formats the tree object the way required in 'output_classes' property of + dataset. + """ + if self.fields is None: + return ops.Tensor + output_classes = {} + for field in self.fields: + output_classes[field.name] = field.to_output_classes() + return output_classes + + def to_output_shapes(self): + """Formats the tree object the way required in 'output_shapes' property of + dataset. + """ + if self.fields is None: + object_type = types[self.type_id] + if object_type is not None: + is_array = object_type[1] + if is_array: + return tensor_shape.TensorShape([None]) + return tensor_shape.TensorShape([]) + raise Exception("Unsupported type [type_id=%d]" % self.type_id) + output_shapes = {} + for field in self.fields: + output_shapes[field.name] = field.to_output_shapes() + return output_shapes + + def to_output_types(self): + """Formats the tree object the way required in 'output_types' property of + dataset. + """ + if self.fields is None: + object_type = types[self.type_id] + if object_type is not None: + return object_type[0] + raise Exception("Unsupported type [type_id=%d]" % self.type_id) + else: + output_types = {} + for field in self.fields: + output_types[field.name] = field.to_output_types() + return output_types + + def to_flat(self): + """Returns a list of leaf node types.""" + return self.to_flat_rec([]) + + def to_permutation(self): + """Returns a permutation that should be applied to order object leafs.""" + correct_order_dict = {} + self.traversal_rec(correct_order_dict, 0) + object_order = [] + self.traversal_permutation_rec(object_order) + return [correct_order_dict[o] for o in object_order] + + def to_flat_rec(self, flat): + """Formats a list of leaf node types.""" + flat.append(self.type_id) + if self.fields is not None: + for field in self.fields: + field.to_flat_rec(flat) + return flat + + def traversal_permutation_rec(self, permutation): + """Collects nodes in accordance with permutation.""" + if self.fields is None: + permutation.append(self) + else: + for idx in self.permutation: + field = self.fields[idx] + field.traversal_permutation_rec(permutation) + + def traversal_rec(self, d, i): + """Collects nodes in pre-order traversal.""" + if self.fields is None: + d[self] = i + i += 1 + else: + for field in self.fields: + i = field.traversal_rec(d, i) + return i + +class IgniteClient(TcpClient): + """IgniteClient class exposes methods to work with Apache Ignite using Thin + client. This client works with assumption that all object in the cache + have the same structure (homogeneous objects) and the cache contains at + least one object. + """ + def __init__(self, host, port, username=None, password=None, certfile=None,\ + keyfile=None, cert_password=None): + """Constructs a new instance of IgniteClient. + + Args: + host: Apache Ignite Thin client host to be connected. + port: Apache Ignite Thin client port to be connected. + username: Apache Ignite Thin Client authentication username. + password: Apache Ignite Thin Client authentication password. + certfile: File in PEM format containing the certificate as well as + any number of CA certificates needed to establish the certificate’s + authenticity. + keyfile: File containing the private key (otherwise the private key + will be taken from certfile as well). + cert_password: Password to be used if the private key is encrypted and a + password is necessary. + """ + TcpClient.__init__(self, host, port, certfile, keyfile, cert_password) + self.username = username + self.password = password + + def handshake(self): + """Makes a handshake required to be made after connect before any other + calls. + """ + msg_len = 8 + + if self.username is None: + msg_len += 1 + else: + msg_len += 5 + len(self.username) + + if self.password is None: + msg_len += 1 + else: + msg_len += 5 + len(self.password) + + self.write_int(msg_len) # Message length + self.write_byte(1) # Handshake operation + self.write_short(1) # Version (1.1.0) + self.write_short(1) + self.write_short(0) + self.write_byte(2) # Thin client + + if self.username is None: # Username + self.write_byte(101) + else: + self.write_byte(9) + self.write_int(len(self.username)) + self.write_string(self.username) + + if self.password is None: # Password + self.write_byte(101) + else: + self.write_byte(9) + self.write_int(len(self.password)) + self.write_string(self.password) + + self.read_int() # Result length + res = self.read_byte() + + if res != 1: + serv_ver_major = self.read_short() + serv_ver_minor = self.read_short() + serv_ver_patch = self.read_short() + err_msg = self.__parse_string() + if err_msg is None: + raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \ + % (res, serv_ver_major, serv_ver_minor, serv_ver_patch)) + else: + raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \ + message='%s']" % ( + res, + serv_ver_major, + serv_ver_minor, + serv_ver_patch, + err_msg + )) + + def get_cache_type(self, cache_name): + """Collects type information about objects stored in the specified + cache. + """ + cache_name_hash = self.__java_hash_code(cache_name) + self.write_int(25) # Message length + self.write_short(2000) # Operation code + self.write_long(0) # Request ID + self.write_int(cache_name_hash) # Cache name + self.write_byte(0) # Flags + self.write_byte(101) # Filter (NULL) + self.write_int(1) # Cursor page size + self.write_int(-1) # Partition to query + self.write_byte(0) # Local flag + + result_length = self.read_int() + self.read_long() # Request id + status = self.read_int() + + if status != 0: + err_msg = self.__parse_string() + if err_msg is None: + raise Exception("Scan Query Error [status=%s]" % status) + else: + raise Exception("Scan Query Error [status=%s, message='%s']" \ + % (status, err_msg)) + + self.read_long() # Cursor id + row_count = self.read_int() + + if row_count == 0: + raise Exception("Scan Query returned empty result, so it's \ + impossible to derive the cache type") + + payload = DataBuffer(self.read_data(result_length - 25)) + + self.read_byte() # Next page + + res = TypeTreeNode("root", 0, [ + self.__collect_types("key", payload), + self.__collect_types("val", payload) + ], [0, 1]) + + return res + + def __java_hash_code(self, s): + """Computes hash code of the specified string using Java code.""" + h = 0 + for c in s: + h = (31 * h + ord(c)) & 0xFFFFFFFF + return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000 + + def __collect_types(self, field_name, data): + """Extracts type information from the specified object.""" + type_id = data.read_byte() + + # Byte scalar. + if type_id == 1: + data.skip(1) + return TypeTreeNode(field_name, type_id) + + # Short scalar. + if type_id == 2: + data.skip(2) + return TypeTreeNode(field_name, type_id) + + # Integer scalar. + if type_id == 3: + data.skip(4) + return TypeTreeNode(field_name, type_id) + + # Long scalar. + if type_id == 4: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Float scalar. + if type_id == 5: + data.skip(4) + return TypeTreeNode(field_name, type_id) + + # Double scalar. + if type_id == 6: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Char scalar. + if type_id == 7: + data.skip(2) + return TypeTreeNode(field_name, type_id) + + # Bool scalar. + if type_id == 8: + data.skip(1) + return TypeTreeNode(field_name, type_id) + + # String scalar. + if type_id == 9: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # UUID scalar. + if type_id == 10: + data.skip(16) + return TypeTreeNode(field_name, type_id) + + # Date scalar. + if type_id == 11: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Byte array. + if type_id == 12: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # Short array. + if type_id == 13: + length = data.read_int() + data.skip(length * 2) + return TypeTreeNode(field_name, type_id) + + # Integer array. + if type_id == 14: + length = data.read_int() + data.skip(length * 4) + return TypeTreeNode(field_name, type_id) + + # Long array. + if type_id == 15: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Float array. + if type_id == 16: + length = data.read_int() + data.skip(length * 4) + return TypeTreeNode(field_name, type_id) + + # Double array. + if type_id == 17: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Char array. + if type_id == 18: + length = data.read_int() + data.skip(length * 2) + return TypeTreeNode(field_name, type_id) + + # Bool array. + if type_id == 19: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # String array. + if type_id == 20: + length = data.read_int() + for _ in range(length): + header = data.read_byte() + if header == 9: + str_length = data.read_int() + data.skip(str_length) + elif header == 101: + pass + else: + raise Exception("Unknown binary type when expected string \ + [type_id=%d]" % header) + return TypeTreeNode(field_name, type_id) + + # UUID array. + if type_id == 21: + length = data.read_int() + data.skip(length * 16) # TODO: support NULL values. + return TypeTreeNode(field_name, type_id) + + # Date array. + if type_id == 22: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Wrapped Binary Object. + if type_id == 27: + length = data.read_int() + inner_data = data.read_data(length) + data.read_int() # Offset + return self.__collect_types(field_name, DataBuffer(inner_data)) + + # Complex Object. + if type_id == 103: + data.read_byte() # Object version + data.read_short() # Object flags + obj_type_id = data.read_int() + data.read_int() # Object hash code + obj_length = data.read_int() + data.read_int() # Object schema id + obj_schema_offset = data.read_int() + + obj_type = self.__get_type(obj_type_id) + children = [] + + for obj_field in obj_type.fields: + child = self.__collect_types(obj_field.field_name, data) + children.append(child) + + children_sorted = sorted(children, key=lambda child: child.name) + permutation = [children_sorted.index(child) for child in children] + children = children_sorted + + data.skip(obj_length - obj_schema_offset) + + return TypeTreeNode(field_name, type_id, children, permutation) + + raise Exception("Unknown binary type [type_id=%d]" % type_id) + + def __get_type(self, type_id): + """Queries Apache Ignite information about type by type id.""" + self.write_int(14) # Message length + self.write_short(3002) # Operation code + self.write_long(0) # Request ID + self.write_int(type_id) # Type ID + + self.read_int() # Result length + self.read_long() # Request id + status = self.read_int() + + if status != 0: + err_msg = self.__parse_string() + if err_msg is None: + raise Exception("Get Binary Type Error [status=%d, message='%s']" \ + % (status, err_msg)) + else: + raise Exception("Get Binary Type Error [status=%d]" % status) + + binary_type_exists = self.read_byte() + + if binary_type_exists == 0: + raise Exception("Binary type not found [type_id=%d] " % type_id) + + binary_type_id = self.read_int() + binary_type_name = self.__parse_string() + self.__parse_string() # Affinity field name + + fields = [] + for _ in range(self.read_int()): + field_name = self.__parse_string() + field_type_id = self.read_int() + field_id = self.read_int() + + field = BinaryField(field_name, field_type_id, field_id) + fields.append(field) + + is_enum = self.read_byte() + if is_enum == 1: + raise Exception("Enum fields are not supported yet") + + schema_cnt = self.read_int() + for _ in range(schema_cnt): + self.read_int() # Schema id + field_cnt = self.read_int() + self.skip(field_cnt * 4) + + return BinaryType(binary_type_id, binary_type_name, fields) + + def __parse_string(self): + """Parses string.""" + header = self.read_byte() + if header == 9: + length = self.read_int() + return self.read_data(length).decode("utf-8") + if header == 101: + return None + raise Exception("Unknown binary type when expected string [type_id=%d]" \ + % header) + +class IgniteDataset(Dataset): + """Apache Ignite is a memory-centric distributed database, caching, and + processing platform for transactional, analytical, and streaming workloads, + delivering in-memory speeds at petabyte scale. This contrib package + contains an integration between Apache Ignite and TensorFlow. The + integration is based on tf.data from TensorFlow side and Binary Client + Protocol from Apache Ignite side. It allows to use Apache Ignite as a + datasource for neural network training, inference and all other + computations supported by TensorFlow. Ignite Dataset is based on Apache + Ignite Binary Client Protocol. + """ + + def __init__(self, cache_name, host="localhost", port=10800, local=False,\ + part=-1, page_size=100, username=None, password=None, certfile=None,\ + keyfile=None, cert_password=None): + """Create a IgniteDataset. + + Args: + cache_name: Cache name to be used as datasource. + host: Apache Ignite Thin Client host to be connected. + port: Apache Ignite Thin Client port to be connected. + local: Local flag that defines to query only local data. + part: Number of partitions to be queried. + page_size: Apache Ignite Thin Client page size. + username: Apache Ignite Thin Client authentication username. + password: Apache Ignite Thin Client authentication password. + certfile: File in PEM format containing the certificate as well as + any number of CA certificates needed to establish the certificate’s + authenticity. + keyfile: File containing the private key (otherwise the private key + will be taken from certfile as well). + cert_password: Password to be used if the private key is encrypted and a + password is necessary. + """ + super(IgniteDataset, self).__init__() + + with IgniteClient(host, port, username, password, certfile, keyfile,\ + cert_password) as client: + client.handshake() + self.cache_type = client.get_cache_type(cache_name) + + self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\ + name="cache_name") + self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host") + self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port") + self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local") + self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") + self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\ + name="page_size") + self.username = ops.convert_to_tensor("" if username is None else username,\ + dtype=dtypes.string, name="username") + self.password = ops.convert_to_tensor("" if password is None else password,\ + dtype=dtypes.string, name="password") + self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\ + dtype=dtypes.string, name="certfile") + self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\ + dtype=dtypes.string, name="keyfile") + self.cert_password = ops.convert_to_tensor("" if cert_password is None\ + else cert_password, dtype=dtypes.string, name="cert_password") + self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\ + dtype=dtypes.int32, name="schema") + self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\ + dtype=dtypes.int32, name="permutation") + + def _as_variant_tensor(self): + return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\ + self.port, self.local, self.part, self.page_size, self.username,\ + self.password, self.certfile, self.keyfile, self.cert_password,\ + self.schema, self.permutation) + + @property + def output_classes(self): + return self.cache_type.to_output_classes() + + @property + def output_shapes(self): + return self.cache_type.to_output_shapes() + + @property + def output_types(self): + return self.cache_type.to_output_types() diff --git a/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py new file mode 100644 index 0000000000..8115bda85b --- /dev/null +++ b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py @@ -0,0 +1,25 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Python helper for loading Ignite ops and kernels.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.util import loader +from tensorflow.python.platform import resource_loader + +_dataset_ops = loader.load_op_library( + resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh new file mode 100755 index 0000000000..f4607ce8ad --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-plain.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh \ +-u "jdbc:ignite:thin://127.0.0.1/" \ +--run=/data/sql/init.sql + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh new file mode 100755 index 0000000000..dde1162816 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl-auth.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\ +sslMode=require&\ +sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\ +sslClientCertificateKeyStorePassword=123456&\ +sslTrustAll=true&\ +username=ignite&\ +password=ignite" --run=/data/sql/init.sql + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh new file mode 100755 index 0000000000..58b40b2738 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\ +sslMode=require&\ +sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\ +sslClientCertificateKeyStorePassword=123456&\ +sslTrustAll=true" --run=/data/sql/init.sql --verbose=true + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml new file mode 100644 index 0000000000..d900174a8a --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml new file mode 100644 index 0000000000..8e001b28ab --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml new file mode 100644 index 0000000000..42d480c114 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py new file mode 100644 index 0000000000..933e62b804 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py @@ -0,0 +1,77 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# ============================================================================== +"""Tests for IgniteDataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tensorflow as tf +from tensorflow.contrib.ignite import IgniteDataset +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + +class IgniteDatasetTest(test.TestCase): + """The Apache Ignite servers have to setup before the test and tear down + after the test manually. The docker engine has to be installed. + + To setup Apache Ignite servers: + $ bash start_ignite.sh + + To tear down Apache Ignite servers: + $ bash stop_ignite.sh + """ + + def test_ignite_dataset_with_plain_client(self): + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300) + self.__check_dataset(ds) + + def test_ignite_dataset_with_ssl_client(self): + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\ + certfile=os.path.dirname(os.path.realpath(__file__)) +\ + "/keystore/client.pem", cert_password="123456") + self.__check_dataset(ds) + + def test_ignite_dataset_with_ssl_client_and_auth(self): + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\ + certfile=os.path.dirname(os.path.realpath(__file__)) +\ + "/keystore/client.pem", cert_password="123456",\ + username="ignite", password="ignite") + self.__check_dataset(ds) + + def __check_dataset(self, dataset): + """Checks that dataset provids correct data. + """ + self.assertEquals(tf.int64, dataset.output_types['key']) + self.assertEquals(tf.string, dataset.output_types['val']['NAME']) + self.assertEquals(tf.int64, dataset.output_types['val']['VAL']) + + it = dataset.make_one_shot_iterator() + ne = it.get_next() + + with tf.Session() as sess: + rows = [sess.run(ne), sess.run(ne), sess.run(ne)] + with self.assertRaises(errors.OutOfRangeError): + sess.run(ne) + + self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\ + rows[0]) + self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\ + rows[1]) + self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\ + rows[2]) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/ignite/python/tests/keystore/client.jks b/tensorflow/contrib/ignite/python/tests/keystore/client.jks new file mode 100644 index 0000000000000000000000000000000000000000..1875c71b605253603eb63e446da8f07cd84f64a0 GIT binary patch literal 3232 zcmezO_TO6u1_mZ5W@KPX&dE&8D`8+@G{5kwMTdcbX^%k@(+&eZHZE;8MixdbCP79< zRtAYq8p|C_4X`71ir_3D+J-^_h0{`v;Jq^I7?=cj)By2Gv~;-!V; zrk^*tG^Q9QMw#R_-+6sBwsZISYYa0FeehS&sNA)_h$$$xItuMbc#jm^U z)EsGM7v48t=yRQpg+|uW%p2uNTMy;l<&ooQ-L|Fm+-LC}Ql7ti^n{IXo#DN-aQ4pg z#|pK-#~2r{+<*SvmVdLv(*kBDMkXdk6$4&2POUbNw(q=*jI68-2IYp_2Apinp)72|OhKWB zJO*4K4u>#va6l4_#}45M8t{YUxP&>}i!xK(6EpK*3b=)tT~f$B_|rn8OXqG z;$#%#Pfm15FUm|wPt45IOU}1uEr#pj7x=Nuzv6iE!us=ylaaxU{}zcI(6gN!b!PABw|X+mmI_84U%E|> z{mQW=_S;Qr`2HyF@K#y!w@;mknUR5UakW7uIJRVkS(pqM40PBy6WTl&+kQAP^0LT_ z$@&+h<^>1)D7dHQr4}WYq^2l1rxulDre!84mZTcUvN4Cs^0A1qh&cZ>+QiDUB3!Cv z*QCz@@``+0T3;B*gQS&NBn-qFM7I9ms7T4H+0xmqwdiX68%NFUt%1m4!wL$UMuzTN zKavj~N?{J}lzRGR$J_m%x8=zk+ue9hbM_;R=>eq;wo5iLG6bg=|6T5UzizMm4#|@0 zC3nU6D?6K?{L1|Lkcsc;orbHcP1{x#s@#Ya5UGy8w0-x%FE!^&Uhc18xbL`FJu7bh zEc2IVEfii)u)od6#&v3~%l?{W-yQ1i>pxA|_=W+N&Y67;nwUKenwU00(m5x?%60vB zcixWkLv!^YNbd$_rly8Qu=H*MS!M-c39-n=bx7Ci%v0sW&eC`$Zt&A6J zx2%mi{^N~-V|3b??)9@L{4(U4*Vx*;m6w(6+2y~S{15JDb~|jS)~V&%s_R*>%7H6j zac`Q_$Ni90-+0HM@g`F8#hN-BS3=Tn<3fYRdDv6%!s;cJ0~XFrNNJJ>5CIRj);LinkxSv+h!V3q#BbzJF1B6oSQ$a90?dH&#Df;JoYLpG4o4 z_vH`lZk%qiR`UI=vL~V2U#z=$!t$%x!4s7Yn#sG}#XXJ0Jr*}aeBxPNc>0L`(;ur` z7niRTx7FTo)X_L*nZ@%!-U~AR;=(>@))7-*?Qk|v-SSgPPEf}ZT!%0vCqmjp&4Hu& pj0zRrXg*^OWubk;VkqUaBW_g*^E|kHw{Q7 +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,CE61EDD98349D0C7 + +Kzl16sj8R7YUXPCEZCqCrY4LSAjiKCRFNOagEehvN9Jpswcz4JbatoFmvVvOCgBF +7kkeCaALhfM5a+46uynZ1sOOFUOn8fUFgguN3lLInWfm6vTuXDPslg0/tRNI0YqW +ujfxyzrm1/k4RX0oLzRE1jZr69VZsBmZndkz9nkz3anWKLE7X/VIFV6U/N6YNPch +BG1Fxpt/HtM9p3B5wNDSjCVaeNP1ROKe3APLRY6k+SppTuntHV5q9Ni82r1l3ahU +zf2QvocSy9MLh+bGusJGHyJJAGuwPHm6ytPwbXGHn5xe4HPIno28j9kN7EL1ZoUs +q0PhipAkFrGIM4zg6nAwVdzY5iGySDQ3fWpz2MkrKMDRftBwA3o/M321NBUW9/2X +l+XmjXcJd0dEOslGxveb6UXLL2YvYszjQXRR4dCV/40bMJL3umRhVSay0NteoXfY +82rQchm2NHKOiDfB4RpD8JJtVQeDSMXc9TH5y2Ua7FZND60JXtFpdnfCVfVZuBJm +yBafyIsXR7EQzLG4z28Dvp4fs42A3JkF+e9Aq6Y6MmYA1wsvIKKT9HKEifqKmbgG +4E9WOZn5IWi4ZJ44VAwN/uBGrLm//3OjByeB9y8vszNbyY8dQ8x5XqnF/IzIvgqc +uKA8xuLAkTFmgRGQ/lmMDR+iMhet5dCtg9Orb9tYVL55JAb/OfsCX0LTJ3Y2RmIx +CaFpkUP7KKYD+69ajnFCxvfGnGxyBkf+JeuDYIZVFklVT9SUtL9RJh26jUdvHt2A +LQerBl8UCkVbPxsxYjdawvxuBNTD6tSRykM8zwtWcvIubp+gxE7png== +-----END RSA PRIVATE KEY----- +Bag Attributes + friendlyName: 1.2.840.113549.1.9.1=#1613636c69656e7440677269646761696e2e636f6d,CN=client,OU=Dev,O=GridGain,ST=SPb,C=RU + localKeyID: 54 69 6D 65 20 31 33 33 39 32 33 39 38 35 39 34 34 36 +subject=/C=RU/ST=SPb/O=GridGain/OU=Dev/CN=client/emailAddress=client@gridgain.com +issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com +-----BEGIN CERTIFICATE----- +MIIC2TCCAkKgAwIBAgIBJDANBgkqhkiG9w0BAQUFADB3MQswCQYDVQQGEwJSVTEM +MAoGA1UECBMDU1BiMQwwCgYDVQQHEwNTUGIxETAPBgNVBAoTCEdyaWRHYWluMQww +CgYDVQQLEwNEZXYxCzAJBgNVBAMTAmNhMR4wHAYJKoZIhvcNAQkBFg9jYUBncmlk +Z2Fpbi5jb20wHhcNMTIwNjA5MTEwNDE3WhcNMzIwNjA5MTEwNDE3WjBxMQswCQYD +VQQGEwJSVTEMMAoGA1UECBMDU1BiMREwDwYDVQQKEwhHcmlkR2FpbjEMMAoGA1UE +CxMDRGV2MQ8wDQYDVQQDEwZjbGllbnQxIjAgBgkqhkiG9w0BCQEWE2NsaWVudEBn +cmlkZ2Fpbi5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANIHHcYiA+CP +EBPKNZJ6mtvN4d9Yj43B5/hzs/TK3e4XImLsMhXaElYtrXQX/SDK7Zv5zdj6AkKH +QkJ9BT8Jw7wvOQx/v4Qxrl+gTgcf6gjk6DvzqMlZUwH+ohbALj2TWsy9y+0uHKal +EVrHpbYeB9TGpD+3NHwO/CG4SySk/Y4nAgMBAAGjezB5MAkGA1UdEwQCMAAwLAYJ +YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud +DgQWBBRD/TKyBQyoVxqEupLzUB8hDrSF6DAfBgNVHSMEGDAWgBS1+Ah4ZG58tImL +KqLVX+xBKbeFUTANBgkqhkiG9w0BAQUFAAOBgQCL2vhjwcJkA1OJGuXsuO2/87Zu +HMa7gc4pm+Iol1B1gD2ksQEAU2dz/adD3369H7gZdHuk3RYPeYmD5Ppp9eECDsXc +gNWrNYaqcSTYWRAUe1/St7vB9HzPdOm/eADfQaMnal6fmjfpzTgg65A/2w4GCsqt +RL98pvdAft8v5WSx7A== +-----END CERTIFICATE----- +Bag Attributes + friendlyName: 1.2.840.113549.1.9.1=#160f636140677269646761696e2e636f6d,CN=ca,OU=Dev,O=GridGain,L=SPb,ST=SPb,C=RU +subject=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com +issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com +-----BEGIN CERTIFICATE----- +MIIDSTCCArKgAwIBAgIJAKmuj925215OMA0GCSqGSIb3DQEBBQUAMHcxCzAJBgNV +BAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3Jp +ZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEW +D2NhQGdyaWRnYWluLmNvbTAeFw0xMjA2MDkwNjU1MTJaFw0zMjA2MDQwNjU1MTJa +MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8G +A1UEChMIR3JpZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkq +hkiG9w0BCQEWD2NhQGdyaWRnYWluLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEAtd16DCObyM63NKF/cvRcE+8cr1dc3c7mSnTEQ61WfqPJ2QqsQAB6e+5+ +q9Np1SaJyqFTTag6483ibrU+DkGPGgEXndRHtQHQPbStWsf47DBBW2bMi6+bkPox +Cp6BhYO1DQUG5tP9CQ/g32mLQLB7LH0KtS1JcKpAClCjjWZC8b8CAwEAAaOB3DCB +2TAdBgNVHQ4EFgQUtfgIeGRufLSJiyqi1V/sQSm3hVEwgakGA1UdIwSBoTCBnoAU +tfgIeGRufLSJiyqi1V/sQSm3hVGhe6R5MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQI +EwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3JpZEdhaW4xDDAKBgNVBAsT +A0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEWD2NhQGdyaWRnYWluLmNv +bYIJAKmuj925215OMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAhrzd +qusVLHO3wtyu0o+EAFyoDv5avCBTFsQLeDDPMyfDcEO6wfxhTanfH8C7gZc0rRnv +2nbkVbfortHIOfU2wch5gClju0cXSTIXSKOAWPIMp3HLxC/l+KpFo3epFz0rsMVB +M1ymOOdRDdAcTxcTTGY7WJXquEM3ZbT5Gh4RLDk= +-----END CERTIFICATE----- diff --git a/tensorflow/contrib/ignite/python/tests/keystore/server.jks b/tensorflow/contrib/ignite/python/tests/keystore/server.jks new file mode 100644 index 0000000000000000000000000000000000000000..006ececc31118aa18ddb6e4ec27d002e5e11646c GIT binary patch literal 3230 zcmezO_TO6u1_mZLW=c+EU|=-A@M=~y18anysevT}1GBF|6SJp56Vs*z%uI|-Oq>iW z*Y)4sc{|R}fR~L^tIebBJ1-+6D=ULRxgoa!CmVAp3!5-gP^ck~0T+nFAqM zGxPM4^K%X4#CZ*k49pBH4a`hU4UMA2d5u9_6DW7!9M(7=IgA)t8JHV;84MabnHn1z zw%)DcQJ#I`+;)?N^+jJ|gx|}o50ANf?wMD~5$CmGb&F5liCa029D8bXS&zVp76_%YhGh(^HyF~ zwr7|Ba`Hd8pV{rOp<1VwYpbqj!72x?fW^IOP9OI(F*7nSE^fSI(0J28mW?@7mXAe@ zMP%y_j*67Lnk}8(T8pm6zj4&u-Wq7oxDq6<%+k2fpmAOURy7N&msAc|I5#1sNgj|> zg;`h)m>C)WBZodKDD)c{+VAF{>n`=TFvP6j`xmuGAz17PcZI=uWA(!Y z&bto&N%UQLU;e=E#_1+&CEwpFdlI_+#kz|pEWer^JW<)8nY`Ow+|x+hV{t>oC!Xbn zr;q4A{jth*arsJdTkQ=;9gSm_Sv(Kqy&&T+F6@(L9WnLQ4rlY!EkC8?1a&MK7#J8C z*osq&%2JCUd2Mzj0|V18gC?eJ27GK>+H8z0j9N^BjEt-dEKN*{JR;+brdQtCd04*1 z$YblrpVOXBDssj0qL-e! z_3tE^3-va%w|`$L;}Nj<>_c{`U$cx$gv{s4>?kxoKIdBIo;jx!9jbh5SLQifJJrzs zKg>i-)9GBrhpt-!C&V3HuCAQCd+ibRlH+zhVzLkahd3JDTd_wa_k`B9sMrWek?13{ z*dBacz4IgAeb)@M~3AX3(WI_H1kwhYD0)`sg3qynpK@nTnN(CM+8|HvZ!6J^p;z z70HDklrAs3a-g*4a!r-uh2Qu8U1U*M%*r<}VcnA4fROmk&r_YszlR%axuOEMR(yjB1N0!)447OT5hin z`e@h^I(x}V?cc3Y48HPiKl(P^C`fr6_w|pjbFf;;&fi9lD<_^(-g^I5=a%BLPyRf3 zdU0>w&2)9mhk?Jp?O3_5|F@rk_~K+{aP0xBEtu{aG%?*YXkv1L)E11YXnAnJ3JF9l zVQ6SzVg#=xETLS3LRh7Pt@$#x zU$5)brMsWC-|mr^9a-qdEyI5}#;Jbot6!hlq^dr@w9ngL^i9Q-xAje)zPeoZQTZ3^ zI{SX>N-Unl-@TrBs?fRG_WGF4H4fa*%CAD|hiZdLaGfqI%)(^AV4%asnb79J*!IJT zk(WhYOxC|3H7_{WN5MTcFSRJKBsE3BIkl)HGc7YYu_P6#T4`B)<&VueNfCvrDJrI6 z;+q!D*D*4X2Nl)IED{D{*y|Lul*8QE$guv)rbS0jOy~Khd5i0YgcQr&WH!FHLBEbS zt>f>NJd)IQh2=oPhQ#@NHyz)uGMdNwXx?Y07a!}Fce*DBynQM9JI6`FqT^^1yQE9p z&0|vfQ<(D9Zo~)i9e#GAtb#Qsng8dHFBKsVC+4kv*LYIX@L62#V&#`f_RBX&U0c1a z>(PbZn?_qjRA{c@OZB6zBIZyQ>NJaoQmbfAcG9xQoh!B}_ieqmB!8{QbKdug03J1V AGXMYp literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/ignite/python/tests/keystore/trust.jks b/tensorflow/contrib/ignite/python/tests/keystore/trust.jks new file mode 100644 index 0000000000000000000000000000000000000000..a00f1251af72982ddcd42c0274fc7b16e35dbc4c GIT binary patch literal 2432 zcmezO_TO6u1_mYu1_ov@&6J$Tz`$sJ;nlo346G4)rUsS_49va;P0XGKO-!2>Ff%bS zF>x}iT-Seh=j}K@170>xtu~Lg@4SqRtgH+M<%Zk_oNUaYENsF|L7|2`23#NxhcI(+ zKoX3{4&ezJ@Pp*IggM-cGE>|WGxJ~yxP_TrQp;c_F$*&#CmPBb$iQvlWEA61PIO2w z%1lX5%*@kE&d)WF6X!KFGB7i+G%zzYH8hG6=QRd#O`zO?b6DeiYTK0M~`xo2J_N1WG&)h#}GlWUCwLsj*=y49C6 zuc~#PS{UrR!s_waM|oTA_#FGC7{%vaao@^#!FJ2qsN+B07&u0!o#|dbd%`b6u6d2E z&0Bd{*`8hg%gO)XerC7BhH9N!uC2PB1*;sm0v7kCIepyE#LURRxVZ6-LE}vWSvKZS zSw0pq7Llz#I4V-|YPNKCYc0AO|He^sduyOU<4TacGE3t^gT{FcSk)}7UQ#(=;oO9j zCV4IU}j|ej~x1}pwMq*Xxnpl)oW3m;_Zj-th?0T!Vt59?_bm&ggGBm3)7z>`CbM7waybu>5Lv@I+;UX7X-#aZe+0kHrlU zpLmuRo<5@g^v5dK#pNr-ZM8QXbu^AyX7N0b_kxVSxUf%}b;Q(HJDkl^xBQfn6V$N; z=QFnCoXpg`5=dT~Ujff+Om_{Mm~I+0F*!l<8lwuD4+boV5qZqe(7?pd9G1r{p`3Yusegj@mdB7|T4H-iv0|n$PFANDV)NGG0cU@waJ*LF`pkF}vl;>}-9-F+vnl=Q|(^p;SV?%ERZzY3?`&i;A!#xEwPb|i4%8u8Uvb$1eYh92y{F&m$t+tc7u(KqqxIcWy#+@bx7{2HmC%bh_b>gOa=@FI&7Q?Z61tmKb#nOS>(lJ{R>j_f`fe& z+*9*XixNvxQxu$2i%K%nGLsWaQjs#H^IxM)tUN2irCN4P`Wzsy$hW2Sg@HUMp)0dU z7>Hrdk!UH0xv`O<`__--gNIU>gFB_3zS;41|L1LaGRJl|p3|KDNMm|HX@l*Ojf@Pz z>BWDSJKwL{E5AdsqT1)rRfQ@yA_YXM<1cOBeeg@o z`I49WD;VxOE>_Qqn?K9^?x~m%==KUWoEfg)yv(f;8ne#OZ(yH^Yyw;UAp^O`|Tcy z*^z~Q+%o)!W1Q;OzWVi8s0iAC-TxuCwpAuEgR={N3xB zrwW~`ZLg2%T;ss~to$lf(|*h1D}QX(Ns1^`O;Iro6W_FGzK#(&X@C8fO^c46n9lQ0 z^A^_)2`QGl$!vUYgMJ-vTF2ijc_gXr3d@0n4T7ZHcaD>UMaR)3c1f4Go5!T|r!eKI-G~q3JN)cKSp{oOGXKvXUn)W#PRv{TuJNR% v;j_5f#mX;}?3Ztly0&^-*P{!+H%&8h)e~82@&Dp^^P Date: Fri, 24 Aug 2018 18:15:57 +0300 Subject: [PATCH 0103/1357] Remove duplicated header from README.md. --- tensorflow/contrib/ignite/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md index 9054344e94..f2596fc572 100644 --- a/tensorflow/contrib/ignite/README.md +++ b/tensorflow/contrib/ignite/README.md @@ -1,4 +1,3 @@ -### Ignite Dataset # Ignite Dataset - [Overview](#overview) @@ -164,4 +163,4 @@ After that you will be able to work with it following way: ## Limitations -Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. \ No newline at end of file +Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. -- GitLab From 241c1740ee26b57b7a5fe8f72b9d34f4515af760 Mon Sep 17 00:00:00 2001 From: dmitrievanthony Date: Sun, 26 Aug 2018 16:03:04 +0000 Subject: [PATCH 0104/1357] Update after review: change 'ignite' namespace to 'tensorflow', rename variables to satisty code style, use pointers instead of references. --- tensorflow/contrib/ignite/BUILD | 1 - tensorflow/contrib/ignite/__init__.py | 4 +- .../kernels/ignite_binary_object_parser.cc | 322 +++++++--------- .../kernels/ignite_binary_object_parser.h | 9 +- .../contrib/ignite/kernels/ignite_client.cc | 55 --- .../contrib/ignite/kernels/ignite_client.h | 45 ++- .../contrib/ignite/kernels/ignite_dataset.cc | 105 +++-- .../contrib/ignite/kernels/ignite_dataset.h | 65 ++-- .../ignite/kernels/ignite_dataset_iterator.cc | 358 +++++++++--------- .../ignite/kernels/ignite_dataset_iterator.h | 80 ++-- .../ignite/kernels/ignite_dataset_ops.cc | 10 +- .../ignite/kernels/ignite_plain_client.h | 21 +- .../kernels/ignite_plain_client_unix.cc | 78 ++-- .../kernels/ignite_plain_client_windows.cc | 77 ++-- .../ignite/kernels/ignite_ssl_wrapper.cc | 107 +++--- .../ignite/kernels/ignite_ssl_wrapper.h | 30 +- 16 files changed, 619 insertions(+), 748 deletions(-) delete mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index 9f6c666893..b7d40a99f7 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -40,7 +40,6 @@ cc_library( srcs = [ "kernels/ignite_dataset_ops.cc", "kernels/ignite_client.h", - "kernels/ignite_client.cc", "kernels/ignite_plain_client.h", "kernels/ignite_ssl_wrapper.h", "kernels/ignite_ssl_wrapper.cc", diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py index 468920a557..b78829d0f4 100644 --- a/tensorflow/contrib/ignite/__init__.py +++ b/tensorflow/contrib/ignite/__init__.py @@ -30,9 +30,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \ -import IgniteDataset - +from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops import IgniteDataset from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc index bf0ef8766e..9bf4480d2d 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -15,290 +15,258 @@ limitations under the License. #include "ignite_binary_object_parser.h" -namespace ignite { +namespace tensorflow { -tensorflow::Status BinaryObjectParser::Parse( - uint8_t*& ptr, std::vector& out_tensors, - std::vector& types) { - uint8_t object_type_id = *ptr; - ptr += 1; +Status BinaryObjectParser::Parse(uint8_t** ptr, + std::vector* out_tensors, + std::vector* types) { + uint8_t object_type_id = **ptr; + *ptr += 1; switch (object_type_id) { case BYTE: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT8, {}); - tensor.scalar()() = *((uint8_t*)ptr); - ptr += 1; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_UINT8, {}); + tensor.scalar()() = *((uint8_t*)*ptr); + *ptr += 1; + out_tensors->push_back(std::move(tensor)); break; } case SHORT: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT16, {}); - tensor.scalar()() = *((int16_t*)ptr); - ptr += 2; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT16, {}); + tensor.scalar()() = *((int16_t*)*ptr); + *ptr += 2; + out_tensors->push_back(std::move(tensor)); break; } case INT: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT32, {}); - tensor.scalar()() = *((int32_t*)ptr); - ptr += 4; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT32, {}); + tensor.scalar()() = *((int32_t*)*ptr); + *ptr += 4; + out_tensors->push_back(std::move(tensor)); break; } case LONG: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, {}); - tensor.scalar()() = *((int64_t*)ptr); - ptr += 8; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT64, {}); + tensor.scalar()() = *((int64_t*)*ptr); + *ptr += 8; + out_tensors->push_back(std::move(tensor)); break; } case FLOAT: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_FLOAT, {}); - tensor.scalar()() = *((float*)ptr); - ptr += 4; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_FLOAT, {}); + tensor.scalar()() = *((float*)*ptr); + *ptr += 4; + out_tensors->push_back(std::move(tensor)); break; } case DOUBLE: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_DOUBLE, {}); - tensor.scalar()() = *((double*)ptr); - ptr += 8; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_DOUBLE, {}); + tensor.scalar()() = *((double*)*ptr); + *ptr += 8; + out_tensors->push_back(std::move(tensor)); break; } case UCHAR: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT16, {}); - tensor.scalar()() = *((uint16_t*)ptr); - ptr += 2; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_UINT16, {}); + tensor.scalar()() = *((uint16_t*)*ptr); + *ptr += 2; + out_tensors->push_back(std::move(tensor)); break; } case BOOL: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_BOOL, {}); - tensor.scalar()() = *((bool*)ptr); - ptr += 1; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_BOOL, {}); + tensor.scalar()() = *((bool*)*ptr); + *ptr += 1; + out_tensors->push_back(std::move(tensor)); break; } case STRING: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_STRING, {}); - tensor.scalar()() = std::string((char*)ptr, length); - ptr += length; - out_tensors.emplace_back(std::move(tensor)); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_STRING, {}); + tensor.scalar()() = std::string((char*)*ptr, length); + *ptr += length; + out_tensors->push_back(std::move(tensor)); break; } case DATE: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, {}); - tensor.scalar()() = *((int64_t*)ptr); - ptr += 8; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT64, {}); + tensor.scalar()() = *((int64_t*)*ptr); + *ptr += 8; + out_tensors->push_back(std::move(tensor)); break; } case BYTE_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT8, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length})); - uint8_t* arr = (uint8_t*)ptr; - ptr += length; + uint8_t* arr = (uint8_t*)*ptr; + *ptr += length; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case SHORT_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT16, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length})); - int16_t* arr = (int16_t*)ptr; - ptr += length * 2; + int16_t* arr = (int16_t*)*ptr; + *ptr += length * 2; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case INT_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT32, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length})); - int32_t* arr = (int32_t*)ptr; - ptr += length * 4; + int32_t* arr = (int32_t*)*ptr; + *ptr += length * 4; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case LONG_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); - int64_t* arr = (int64_t*)ptr; - ptr += length * 8; + int64_t* arr = (int64_t*)*ptr; + *ptr += length * 8; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case FLOAT_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_FLOAT, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length})); - float* arr = (float*)ptr; - ptr += 4 * length; + float* arr = (float*)*ptr; + *ptr += 4 * length; std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case DOUBLE_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_DOUBLE, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length})); - double* arr = (double*)ptr; - ptr += 8 * length; + double* arr = (double*)*ptr; + *ptr += 8 * length; std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case UCHAR_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT16, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length})); - uint16_t* arr = (uint16_t*)ptr; - ptr += length * 2; + uint16_t* arr = (uint16_t*)*ptr; + *ptr += length * 2; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case BOOL_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_BOOL, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length})); - bool* arr = (bool*)ptr; - ptr += length; + bool* arr = (bool*)*ptr; + *ptr += length; std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case STRING_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_STRING, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length})); for (int32_t i = 0; i < length; i++) { - int32_t str_length = *((int32_t*)ptr); - ptr += 4; - const int8_t* str = (const int8_t*)ptr; - ptr += str_length; + int32_t str_length = *((int32_t*)*ptr); + *ptr += 4; + const int8_t* str = (const int8_t*)*ptr; + *ptr += str_length; tensor.vec()(i) = std::string((char*)str, str_length); } - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case DATE_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, - tensorflow::TensorShape({length})); - int64_t* arr = (int64_t*)ptr; - ptr += length * 8; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); + int64_t* arr = (int64_t*)*ptr; + *ptr += length * 8; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case WRAPPED_OBJ: { - int32_t byte_arr_size = *((int32_t*)ptr); - ptr += 4; + int32_t byte_arr_size = *((int32_t*)*ptr); + *ptr += 4; - tensorflow::Status status = Parse(ptr, out_tensors, types); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); - int32_t offset = *((int32_t*)ptr); - ptr += 4; + int32_t offset = *((int32_t*)*ptr); + *ptr += 4; break; } case COMPLEX_OBJ: { - uint8_t version = *ptr; - ptr += 1; - int16_t flags = *((int16_t*)ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 - ptr += 2; - int32_t type_id = *((int32_t*)ptr); - ptr += 4; - int32_t hash_code = *((int32_t*)ptr); - ptr += 4; - int32_t length = *((int32_t*)ptr); - ptr += 4; - int32_t schema_id = *((int32_t*)ptr); - ptr += 4; - int32_t schema_offset = *((int32_t*)ptr); - ptr += 4; - - uint8_t* end = ptr + schema_offset - 24; + uint8_t version = **ptr; + *ptr += 1; + int16_t flags = *((int16_t*)*ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 + *ptr += 2; + int32_t type_id = *((int32_t*)*ptr); + *ptr += 4; + int32_t hash_code = *((int32_t*)*ptr); + *ptr += 4; + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + int32_t schema_id = *((int32_t*)*ptr); + *ptr += 4; + int32_t schema_offset = *((int32_t*)*ptr); + *ptr += 4; + + uint8_t* end = *ptr + schema_offset - 24; int32_t i = 0; - while (ptr < end) { + while (*ptr < end) { i++; - tensorflow::Status status = Parse(ptr, out_tensors, types); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); } - ptr += (length - schema_offset); + *ptr += (length - schema_offset); break; } default: { - return tensorflow::errors::Internal("Unknowd binary type (type id ", - (int)object_type_id, ")"); + return errors::Internal("Unknowd binary type (type id ", + (int)object_type_id, ")"); } } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h index 1e845cbc56..9accbd796f 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -17,13 +17,12 @@ limitations under the License. #include "tensorflow/core/framework/dataset.h" #include "tensorflow/core/lib/core/status.h" -namespace ignite { +namespace tensorflow { class BinaryObjectParser { public: - tensorflow::Status Parse(uint8_t*& ptr, - std::vector& out_tensors, - std::vector& types); + Status Parse(uint8_t** ptr, std::vector* out_tensors, + std::vector* types); }; enum ObjectType { @@ -51,4 +50,4 @@ enum ObjectType { COMPLEX_OBJ = 103 }; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc deleted file mode 100644 index 5a8eddb944..0000000000 --- a/tensorflow/contrib/ignite/kernels/ignite_client.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H -#include "ignite_client.h" -#endif - -namespace ignite { - -tensorflow::Status Client::ReadByte(uint8_t& data) { - return ReadData((uint8_t*)&data, 1); -} - -tensorflow::Status Client::ReadShort(int16_t& data) { - return ReadData((uint8_t*)&data, 2); -} - -tensorflow::Status Client::ReadInt(int32_t& data) { - return ReadData((uint8_t*)&data, 4); -} - -tensorflow::Status Client::ReadLong(int64_t& data) { - return ReadData((uint8_t*)&data, 8); -} - -tensorflow::Status Client::WriteByte(uint8_t data) { - return WriteData((uint8_t*)&data, 1); -} - -tensorflow::Status Client::WriteShort(int16_t data) { - return WriteData((uint8_t*)&data, 2); -} - -tensorflow::Status Client::WriteInt(int32_t data) { - return WriteData((uint8_t*)&data, 4); -} - -tensorflow::Status Client::WriteLong(int64_t data) { - return WriteData((uint8_t*)&data, 8); -} - -} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h index 64e28d75f0..944b3fe184 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -13,28 +13,43 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ + #include "tensorflow/core/lib/core/status.h" -namespace ignite { +namespace tensorflow { class Client { public: - virtual tensorflow::Status Connect() = 0; - virtual tensorflow::Status Disconnect() = 0; + virtual Status Connect() = 0; + virtual Status Disconnect() = 0; virtual bool IsConnected() = 0; virtual int GetSocketDescriptor() = 0; + virtual Status ReadData(uint8_t* buf, int32_t length) = 0; + virtual Status WriteData(uint8_t* buf, int32_t length) = 0; + + inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); } + + inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); } + + inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); } + + inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); } - virtual tensorflow::Status ReadByte(uint8_t& data); - virtual tensorflow::Status ReadShort(int16_t& data); - virtual tensorflow::Status ReadInt(int32_t& data); - virtual tensorflow::Status ReadLong(int64_t& data); - virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0; - - virtual tensorflow::Status WriteByte(uint8_t data); - virtual tensorflow::Status WriteShort(int16_t data); - virtual tensorflow::Status WriteInt(int32_t data); - virtual tensorflow::Status WriteLong(int64_t data); - virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0; + inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); } + + inline Status WriteShort(int16_t data) { + return WriteData((uint8_t*)&data, 2); + } + + inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); } + + inline Status WriteLong(int64_t data) { + return WriteData((uint8_t*)&data, 8); + } }; -} // namespace ignite +} // namespace tensorflow + +#endif diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc index a9bf26955b..f25f8a5b18 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -16,31 +16,29 @@ limitations under the License. #include "ignite_dataset_iterator.h" #include "tensorflow/core/platform/logging.h" -namespace ignite { +namespace tensorflow { -IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx, - std::string cache_name, std::string host, - tensorflow::int32 port, bool local, - tensorflow::int32 part, - tensorflow::int32 page_size, std::string username, +IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name, + std::string host, int32 port, bool local, + int32 part, int32 page_size, std::string username, std::string password, std::string certfile, std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation) - : DatasetBase(tensorflow::DatasetContext(ctx)), - cache_name(cache_name), - host(host), - port(port), - local(local), - part(part), - page_size(page_size), - username(username), - password(password), - certfile(certfile), - keyfile(keyfile), - cert_password(cert_password), - schema(schema), - permutation(permutation) { + std::vector schema, + std::vector permutation) + : DatasetBase(DatasetContext(ctx)), + cache_name_(cache_name), + host_(host), + port_(port), + local_(local), + part_(part), + page_size_(page_size), + username_(username), + password_(password), + certfile_(certfile), + keyfile_(keyfile), + cert_password_(cert_password), + schema_(schema), + permutation_(permutation) { SchemaToTypes(); SchemaToShapes(); @@ -53,55 +51,50 @@ IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx, IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } -std::unique_ptr IgniteDataset::MakeIteratorInternal( - const tensorflow::string& prefix) const { - return std::unique_ptr(new IgniteDatasetIterator( - {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host, - this->port, this->cache_name, this->local, this->part, this->page_size, - this->username, this->password, this->certfile, this->keyfile, - this->cert_password, this->schema, this->permutation)); +std::unique_ptr IgniteDataset::MakeIteratorInternal( + const string& prefix) const { + return std::unique_ptr(new IgniteDatasetIterator( + {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_, + this->cache_name_, this->local_, this->part_, this->page_size_, + this->username_, this->password_, this->certfile_, this->keyfile_, + this->cert_password_, this->schema_, this->permutation_)); } -const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const { - return dtypes; -} +const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; } -const std::vector& -IgniteDataset::output_shapes() const { - return shapes; +const std::vector& IgniteDataset::output_shapes() const { + return shapes_; } -tensorflow::string IgniteDataset::DebugString() const { - return "IgniteDatasetOp::Dataset"; -} +string IgniteDataset::DebugString() const { return "IgniteDatasetOp::Dataset"; } -tensorflow::Status IgniteDataset::AsGraphDefInternal( - tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, - tensorflow::Node** output) const { - return tensorflow::errors::Unimplemented( +Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const { + return errors::Unimplemented( "IgniteDataset does not support 'AsGraphDefInternal'"); } void IgniteDataset::SchemaToTypes() { - for (auto e : schema) { + for (auto e : schema_) { if (e == BYTE || e == BYTE_ARR) { - dtypes.push_back(tensorflow::DT_UINT8); + dtypes_.push_back(DT_UINT8); } else if (e == SHORT || e == SHORT_ARR) { - dtypes.push_back(tensorflow::DT_INT16); + dtypes_.push_back(DT_INT16); } else if (e == INT || e == INT_ARR) { - dtypes.push_back(tensorflow::DT_INT32); + dtypes_.push_back(DT_INT32); } else if (e == LONG || e == LONG_ARR) { - dtypes.push_back(tensorflow::DT_INT64); + dtypes_.push_back(DT_INT64); } else if (e == FLOAT || e == FLOAT_ARR) { - dtypes.push_back(tensorflow::DT_FLOAT); + dtypes_.push_back(DT_FLOAT); } else if (e == DOUBLE || e == DOUBLE_ARR) { - dtypes.push_back(tensorflow::DT_DOUBLE); + dtypes_.push_back(DT_DOUBLE); } else if (e == UCHAR || e == UCHAR_ARR) { - dtypes.push_back(tensorflow::DT_UINT8); + dtypes_.push_back(DT_UINT8); } else if (e == BOOL || e == BOOL_ARR) { - dtypes.push_back(tensorflow::DT_BOOL); + dtypes_.push_back(DT_BOOL); } else if (e == STRING || e == STRING_ARR) { - dtypes.push_back(tensorflow::DT_STRING); + dtypes_.push_back(DT_STRING); } else { LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; } @@ -109,15 +102,15 @@ void IgniteDataset::SchemaToTypes() { } void IgniteDataset::SchemaToShapes() { - for (auto e : schema) { + for (auto e : schema_) { if (e >= 1 && e < 10) { - shapes.push_back(tensorflow::PartialTensorShape({})); + shapes_.push_back(PartialTensorShape({})); } else if (e >= 12 && e < 21) { - shapes.push_back(tensorflow::PartialTensorShape({-1})); + shapes_.push_back(PartialTensorShape({-1})); } else { LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; } } } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h index 2120dfd342..d3fec5910b 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -15,51 +15,48 @@ limitations under the License. #include "tensorflow/core/framework/dataset.h" -namespace ignite { +namespace tensorflow { -class IgniteDataset : public tensorflow::DatasetBase { +class IgniteDataset : public DatasetBase { public: - IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name, - std::string host, tensorflow::int32 port, bool local, - tensorflow::int32 part, tensorflow::int32 page_size, + IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host, + int32 port, bool local, int32 part, int32 page_size, std::string username, std::string password, std::string certfile, std::string keyfile, - std::string cert_password, - std::vector schema, - std::vector permutation); + std::string cert_password, std::vector schema, + std::vector permutation); ~IgniteDataset(); - std::unique_ptr MakeIteratorInternal( - const tensorflow::string& prefix) const override; - const tensorflow::DataTypeVector& output_dtypes() const override; - const std::vector& output_shapes() - const override; - tensorflow::string DebugString() const override; + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override; + const DataTypeVector& output_dtypes() const override; + const std::vector& output_shapes() const override; + string DebugString() const override; protected: - tensorflow::Status AsGraphDefInternal( - tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, - tensorflow::Node** output) const override; + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override; private: - const std::string cache_name; - const std::string host; - const tensorflow::int32 port; - const bool local; - const tensorflow::int32 part; - const tensorflow::int32 page_size; - const std::string username; - const std::string password; - const std::string certfile; - const std::string keyfile; - const std::string cert_password; - const std::vector schema; - const std::vector permutation; - - tensorflow::DataTypeVector dtypes; - std::vector shapes; + const std::string cache_name_; + const std::string host_; + const int32 port_; + const bool local_; + const int32 part_; + const int32 page_size_; + const std::string username_; + const std::string password_; + const std::string certfile_; + const std::string keyfile_; + const std::string cert_password_; + const std::vector schema_; + const std::vector permutation_; + + DataTypeVector dtypes_; + std::vector shapes_; void SchemaToTypes(); void SchemaToShapes(); }; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc index 03cc3c1291..1774585ecd 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -22,270 +22,262 @@ limitations under the License. #include #include -namespace ignite { - -#define CHECK_STATUS(status) \ - if (!status.ok()) return status; +namespace tensorflow { IgniteDatasetIterator::IgniteDatasetIterator( - const Params& params, std::string host, tensorflow::int32 port, - std::string cache_name, bool local, tensorflow::int32 part, - tensorflow::int32 page_size, std::string username, std::string password, - std::string certfile, std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation) - : tensorflow::DatasetIterator(params), - cache_name(cache_name), - local(local), - part(part), - page_size(page_size), - username(username), - password(password), - schema(schema), - permutation(permutation), - remainder(-1), - cursor_id(-1), - last_page(false) { + const Params& params, std::string host, int32 port, std::string cache_name, + bool local, int32 part, int32 page_size, std::string username, + std::string password, std::string certfile, std::string keyfile, + std::string cert_password, std::vector schema, + std::vector permutation) + : DatasetIterator(params), + cache_name_(cache_name), + local_(local), + part_(part), + page_size_(page_size), + username_(username), + password_(password), + schema_(schema), + permutation_(permutation), + remainder_(-1), + cursor_id_(-1), + last_page_(false) { Client* p_client = new PlainClient(host, port); if (certfile.empty()) - client = std::unique_ptr(p_client); + client_ = std::unique_ptr(p_client); else - client = std::unique_ptr(new SslWrapper( + client_ = std::unique_ptr(new SslWrapper( std::unique_ptr(p_client), certfile, keyfile, cert_password)); LOG(INFO) << "Ignite Dataset Iterator created"; } IgniteDatasetIterator::~IgniteDatasetIterator() { - tensorflow::Status status = CloseConnection(); + Status status = CloseConnection(); if (!status.ok()) LOG(ERROR) << status.ToString(); LOG(INFO) << "Ignite Dataset Iterator destroyed"; } -tensorflow::Status IgniteDatasetIterator::EstablishConnection() { - if (!client->IsConnected()) { - tensorflow::Status status = client->Connect(); +Status IgniteDatasetIterator::EstablishConnection() { + if (!client_->IsConnected()) { + Status status = client_->Connect(); if (!status.ok()) return status; status = Handshake(); if (!status.ok()) { - tensorflow::Status disconnect_status = client->Disconnect(); + Status disconnect_status = client_->Disconnect(); if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); return status; } } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::CloseConnection() { - if (cursor_id != -1 && !last_page) { - tensorflow::Status conn_status = EstablishConnection(); +Status IgniteDatasetIterator::CloseConnection() { + if (cursor_id_ != -1 && !last_page_) { + Status conn_status = EstablishConnection(); if (!conn_status.ok()) return conn_status; - CHECK_STATUS(client->WriteInt(18)); // Message length - CHECK_STATUS( - client->WriteShort(close_connection_opcode)); // Operation code - CHECK_STATUS(client->WriteLong(0)); // Request ID - CHECK_STATUS(client->WriteLong(cursor_id)); // Resource ID + TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length + TF_RETURN_IF_ERROR( + client_->WriteShort(close_connection_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Resource ID int32_t res_len; - CHECK_STATUS(client->ReadInt(res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); if (res_len < 12) - return tensorflow::errors::Internal( - "Close Resource Response is corrupted"); + return errors::Internal("Close Resource Response is corrupted"); int64_t req_id; - CHECK_STATUS(client->ReadLong(req_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); int32_t status; - CHECK_STATUS(client->ReadInt(status)); + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); if (status != 0) { uint8_t err_msg_header; - CHECK_STATUS(client->ReadByte(err_msg_header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); if (err_msg_header == string_val) { int32_t err_msg_length; - CHECK_STATUS(client->ReadInt(err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; - CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); std::string err_msg((char*)err_msg_c, err_msg_length); delete[] err_msg_c; - return tensorflow::errors::Internal("Close Resource Error [status=", - status, ", message=", err_msg, "]"); + return errors::Internal("Close Resource Error [status=", status, + ", message=", err_msg, "]"); } - return tensorflow::errors::Internal("Close Resource Error [status=", - status, "]"); + return errors::Internal("Close Resource Error [status=", status, "]"); } - LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; + LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - cursor_id = -1; + cursor_id_ = -1; - return client->Disconnect(); + return client_->Disconnect(); } else { - LOG(INFO) << "Query Cursor " << cursor_id << " is already closed"; + LOG(INFO) << "Query Cursor " << cursor_id_ << " is already closed"; } - return client->IsConnected() ? client->Disconnect() - : tensorflow::Status::OK(); + return client_->IsConnected() ? client_->Disconnect() : Status::OK(); } -tensorflow::Status IgniteDatasetIterator::GetNextInternal( - tensorflow::IteratorContext* ctx, - std::vector* out_tensors, bool* end_of_sequence) { - if (remainder == 0 && last_page) { - LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; +Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) { + if (remainder_ == 0 && last_page_) { + LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - cursor_id = -1; + cursor_id_ = -1; *end_of_sequence = true; - return tensorflow::Status::OK(); + return Status::OK(); } else { - tensorflow::Status status = EstablishConnection(); + Status status = EstablishConnection(); if (!status.ok()) return status; - if (remainder == -1 || remainder == 0) { - tensorflow::Status status = - remainder == -1 ? ScanQuery() : LoadNextPage(); + if (remainder_ == -1 || remainder_ == 0) { + Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage(); if (!status.ok()) return status; } - uint8_t* initial_ptr = ptr; + uint8_t* initial_ptr = ptr_; std::vector types; - std::vector tensors; + std::vector tensors; - status = parser.Parse(ptr, tensors, types); // Parse key + status = parser_.Parse(&ptr_, &tensors, &types); // Parse key if (!status.ok()) return status; - status = parser.Parse(ptr, tensors, types); // Parse val + status = parser_.Parse(&ptr_, &tensors, &types); // Parse val if (!status.ok()) return status; - remainder -= (ptr - initial_ptr); + remainder_ -= (ptr_ - initial_ptr); out_tensors->resize(tensors.size()); for (int32_t i = 0; i < tensors.size(); i++) - (*out_tensors)[permutation[i]] = std::move(tensors[i]); + (*out_tensors)[permutation_[i]] = std::move(tensors[i]); *end_of_sequence = false; - return tensorflow::Status::OK(); + return Status::OK(); } *end_of_sequence = true; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::SaveInternal( - tensorflow::IteratorStateWriter* writer) { - return tensorflow::errors::Unimplemented( +Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { + return errors::Unimplemented( "Iterator for IgniteDataset does not support 'SaveInternal'"); } -tensorflow::Status IgniteDatasetIterator::RestoreInternal( - tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) { - return tensorflow::errors::Unimplemented( +Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) { + return errors::Unimplemented( "Iterator for IgniteDataset does not support 'RestoreInternal')"); } -tensorflow::Status IgniteDatasetIterator::Handshake() { +Status IgniteDatasetIterator::Handshake() { int32_t msg_len = 8; - if (username.empty()) + if (username_.empty()) msg_len += 1; else - msg_len += 5 + username.length(); + msg_len += 5 + username_.length(); - if (password.empty()) + if (password_.empty()) msg_len += 1; else - msg_len += 5 + password.length(); - - CHECK_STATUS(client->WriteInt(msg_len)); - CHECK_STATUS(client->WriteByte(1)); - CHECK_STATUS(client->WriteShort(protocol_major_version)); - CHECK_STATUS(client->WriteShort(protocol_minor_version)); - CHECK_STATUS(client->WriteShort(protocol_patch_version)); - CHECK_STATUS(client->WriteByte(2)); - if (username.empty()) { - CHECK_STATUS(client->WriteByte(null_val)); + msg_len += 5 + password_.length(); + + TF_RETURN_IF_ERROR(client_->WriteInt(msg_len)); + TF_RETURN_IF_ERROR(client_->WriteByte(1)); + TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version)); + TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version)); + TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version)); + TF_RETURN_IF_ERROR(client_->WriteByte(2)); + if (username_.empty()) { + TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); } else { - CHECK_STATUS(client->WriteByte(string_val)); - CHECK_STATUS(client->WriteInt(username.length())); - CHECK_STATUS( - client->WriteData((uint8_t*)username.c_str(), username.length())); + TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteInt(username_.length())); + TF_RETURN_IF_ERROR( + client_->WriteData((uint8_t*)username_.c_str(), username_.length())); } - if (password.empty()) { - CHECK_STATUS(client->WriteByte(null_val)); + if (password_.empty()) { + TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); } else { - CHECK_STATUS(client->WriteByte(string_val)); - CHECK_STATUS(client->WriteInt(password.length())); - CHECK_STATUS( - client->WriteData((uint8_t*)password.c_str(), password.length())); + TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteInt(password_.length())); + TF_RETURN_IF_ERROR( + client_->WriteData((uint8_t*)password_.c_str(), password_.length())); } int32_t handshake_res_len; - CHECK_STATUS(client->ReadInt(handshake_res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&handshake_res_len)); uint8_t handshake_res; - CHECK_STATUS(client->ReadByte(handshake_res)); + TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res)); LOG(INFO) << "Handshake length " << handshake_res_len << ", res " << (int16_t)handshake_res; if (handshake_res != 1) { int16_t serv_ver_major; - CHECK_STATUS(client->ReadShort(serv_ver_major)); + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major)); int16_t serv_ver_minor; - CHECK_STATUS(client->ReadShort(serv_ver_minor)); + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_minor)); int16_t serv_ver_patch; - CHECK_STATUS(client->ReadShort(serv_ver_patch)); + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_patch)); uint8_t header; - CHECK_STATUS(client->ReadByte(header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&header)); if (header == string_val) { int32_t length; - CHECK_STATUS(client->ReadInt(length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&length)); uint8_t* err_msg_c = new uint8_t[length]; - CHECK_STATUS(client->ReadData(err_msg_c, length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length)); std::string err_msg((char*)err_msg_c, length); delete[] err_msg_c; - return tensorflow::errors::Internal( - "Handshake Error [result=", handshake_res, ", version=", - serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, - ", message='", err_msg, "']"); + return errors::Internal("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, ", message='", err_msg, + "']"); } else if (header == null_val) { - return tensorflow::errors::Internal( - "Handshake Error [result=", handshake_res, ", version=", - serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + return errors::Internal("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } else { - return tensorflow::errors::Internal( - "Handshake Error [result=", handshake_res, ", version=", - serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + return errors::Internal("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::ScanQuery() { - CHECK_STATUS(client->WriteInt(25)); // Message length - CHECK_STATUS(client->WriteShort(scan_query_opcode)); // Operation code - CHECK_STATUS(client->WriteLong(0)); // Request ID - CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name))); // Cache name - CHECK_STATUS(client->WriteByte(0)); // Flags - CHECK_STATUS(client->WriteByte(null_val)); // Filter object - CHECK_STATUS(client->WriteInt(page_size)); // Cursor page size - CHECK_STATUS(client->WriteInt(part)); // Partition to query - CHECK_STATUS(client->WriteByte(local)); // Local flag +Status IgniteDatasetIterator::ScanQuery() { + TF_RETURN_IF_ERROR(client_->WriteInt(25)); // Message length + TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR( + client_->WriteInt(JavaHashCode(cache_name_))); // Cache name + TF_RETURN_IF_ERROR(client_->WriteByte(0)); // Flags + TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); // Filter object + TF_RETURN_IF_ERROR(client_->WriteInt(page_size_)); // Cursor page size + TF_RETURN_IF_ERROR(client_->WriteInt(part_)); // part_ition to query + TF_RETURN_IF_ERROR(client_->WriteByte(local_)); // local_ flag int64_t wait_start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); int32_t res_len; - CHECK_STATUS(client->ReadInt(res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); int64_t wait_stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) @@ -293,82 +285,81 @@ tensorflow::Status IgniteDatasetIterator::ScanQuery() { LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms"; - if (res_len < 12) - return tensorflow::errors::Internal("Scan Query Response is corrupted"); + if (res_len < 12) return errors::Internal("Scan Query Response is corrupted"); int64_t req_id; - CHECK_STATUS(client->ReadLong(req_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); int32_t status; - CHECK_STATUS(client->ReadInt(status)); + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); if (status != 0) { uint8_t err_msg_header; - CHECK_STATUS(client->ReadByte(err_msg_header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); if (err_msg_header == string_val) { int32_t err_msg_length; - CHECK_STATUS(client->ReadInt(err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; - CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); std::string err_msg((char*)err_msg_c, err_msg_length); delete[] err_msg_c; - return tensorflow::errors::Internal("Scan Query Error [status=", status, - ", message=", err_msg, "]"); + return errors::Internal("Scan Query Error [status=", status, ", message=", + err_msg, "]"); } - return tensorflow::errors::Internal("Scan Query Error [status=", status, - "]"); + return errors::Internal("Scan Query Error [status=", status, "]"); } - CHECK_STATUS(client->ReadLong(cursor_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_)); - LOG(INFO) << "Query Cursor " << cursor_id << " is opened"; + LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened"; int32_t row_cnt; - CHECK_STATUS(client->ReadInt(row_cnt)); + TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder = res_len - 25; - page = std::unique_ptr(new uint8_t[remainder]); - ptr = page.get(); + remainder_ = res_len - 25; + page_ = std::unique_ptr(new uint8_t[remainder_]); + ptr_ = page_.get(); int64_t start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); - CHECK_STATUS(client->ReadData(ptr, remainder)); + TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); int64_t stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); ; - double size_in_mb = 1.0 * remainder / 1024 / 1024; + double size_in_mb = 1.0 * remainder_ / 1024 / 1024; double time_in_s = 1.0 * (stop - start) / 1000; LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; uint8_t last_page_b; - CHECK_STATUS(client->ReadByte(last_page_b)); + TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); - last_page = !last_page_b; + last_page_ = !last_page_b; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::LoadNextPage() { - CHECK_STATUS(client->WriteInt(18)); // Message length - CHECK_STATUS(client->WriteShort(load_next_page_opcode)); // Operation code - CHECK_STATUS(client->WriteLong(0)); // Request ID - CHECK_STATUS(client->WriteLong(cursor_id)); // Cursor ID +Status IgniteDatasetIterator::LoadNextPage() { + TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length + TF_RETURN_IF_ERROR( + client_->WriteShort(load_next_page_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Cursor ID int64_t wait_start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); int32_t res_len; - CHECK_STATUS(client->ReadInt(res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); int64_t wait_stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) @@ -377,66 +368,65 @@ tensorflow::Status IgniteDatasetIterator::LoadNextPage() { LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms"; if (res_len < 12) - return tensorflow::errors::Internal("Load Next Page Response is corrupted"); + return errors::Internal("Load Next Page Response is corrupted"); int64_t req_id; - CHECK_STATUS(client->ReadLong(req_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); int32_t status; - CHECK_STATUS(client->ReadInt(status)); + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); if (status != 0) { uint8_t err_msg_header; - CHECK_STATUS(client->ReadByte(err_msg_header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); if (err_msg_header == string_val) { int32_t err_msg_length; - CHECK_STATUS(client->ReadInt(err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; - CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); std::string err_msg((char*)err_msg_c, err_msg_length); delete[] err_msg_c; - return tensorflow::errors::Internal("Load Next Page Error [status=", - status, ", message=", err_msg, "]"); + return errors::Internal("Load Next Page Error [status=", status, + ", message=", err_msg, "]"); } - return tensorflow::errors::Internal("Load Next Page Error [status=", status, - "]"); + return errors::Internal("Load Next Page Error [status=", status, "]"); } int32_t row_cnt; - CHECK_STATUS(client->ReadInt(row_cnt)); + TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder = res_len - 17; - page = std::unique_ptr(new uint8_t[remainder]); - ptr = page.get(); + remainder_ = res_len - 17; + page_ = std::unique_ptr(new uint8_t[remainder_]); + ptr_ = page_.get(); int64_t start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); - CHECK_STATUS(client->ReadData(ptr, remainder)); + TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); int64_t stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); ; - double size_in_mb = 1.0 * remainder / 1024 / 1024; + double size_in_mb = 1.0 * remainder_ / 1024 / 1024; double time_in_s = 1.0 * (stop - start) / 1000; LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; uint8_t last_page_b; - CHECK_STATUS(client->ReadByte(last_page_b)); + TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); - last_page = !last_page_b; + last_page_ = !last_page_b; - return tensorflow::Status::OK(); + return Status::OK(); } -int32_t IgniteDatasetIterator::JavaHashCode(std::string str) { +int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const { int32_t h = 0; for (char& c : str) { h = 31 * h + c; @@ -444,4 +434,4 @@ int32_t IgniteDatasetIterator::JavaHashCode(std::string str) { return h; } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h index d1df4527f9..5858dbfcb9 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -14,65 +14,55 @@ limitations under the License. ==============================================================================*/ #include "ignite_binary_object_parser.h" -#include "ignite_dataset.h" - -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H #include "ignite_client.h" -#endif +#include "ignite_dataset.h" -namespace ignite { +namespace tensorflow { -class IgniteDatasetIterator - : public tensorflow::DatasetIterator { +class IgniteDatasetIterator : public DatasetIterator { public: - IgniteDatasetIterator(const Params& params, std::string host, - tensorflow::int32 port, std::string cache_name, - bool local, tensorflow::int32 part, - tensorflow::int32 page_size, std::string username, + IgniteDatasetIterator(const Params& params, std::string host, int32 port, + std::string cache_name, bool local, int32 part, + int32 page_size, std::string username, std::string password, std::string certfile, std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation); + std::vector schema, + std::vector permutation); ~IgniteDatasetIterator(); - tensorflow::Status GetNextInternal( - tensorflow::IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override; + Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, + bool* end_of_sequence) override; protected: - tensorflow::Status SaveInternal( - tensorflow::IteratorStateWriter* writer) override; - tensorflow::Status RestoreInternal( - tensorflow::IteratorContext* ctx, - tensorflow::IteratorStateReader* reader) override; + Status SaveInternal(IteratorStateWriter* writer) override; + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override; private: - std::unique_ptr client; - BinaryObjectParser parser; + std::unique_ptr client_; + BinaryObjectParser parser_; - const std::string cache_name; - const bool local; - const tensorflow::int32 part; - const tensorflow::int32 page_size; - const std::string username; - const std::string password; - const std::vector schema; - const std::vector permutation; + const std::string cache_name_; + const bool local_; + const int32 part_; + const int32 page_size_; + const std::string username_; + const std::string password_; + const std::vector schema_; + const std::vector permutation_; - int32_t remainder; - int64_t cursor_id; - bool last_page; + int32_t remainder_; + int64_t cursor_id_; + bool last_page_; - std::unique_ptr page; - uint8_t* ptr; + std::unique_ptr page_; + uint8_t* ptr_; - tensorflow::Status EstablishConnection(); - tensorflow::Status CloseConnection(); - tensorflow::Status Handshake(); - tensorflow::Status ScanQuery(); - tensorflow::Status LoadNextPage(); - int32_t JavaHashCode(std::string str); + Status EstablishConnection(); + Status CloseConnection(); + Status Handshake(); + Status ScanQuery(); + Status LoadNextPage(); + int32_t JavaHashCode(std::string str) const; }; constexpr uint8_t null_val = 101; @@ -84,4 +74,4 @@ constexpr int16_t scan_query_opcode = 2000; constexpr int16_t load_next_page_opcode = 2001; constexpr int16_t close_connection_opcode = 0; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index 543b5e4afc..89eecf9c14 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/dataset.h" namespace tensorflow { +namespace { class IgniteDatasetOp : public DatasetOpKernel { public: @@ -132,14 +133,15 @@ class IgniteDatasetOp : public DatasetOpKernel { permutation.push_back(permutation_tensor->flat()(i)); } - *output = new ignite::IgniteDataset( - ctx, cache_name, host, port, local, part, page_size, username, password, - certfile, keyfile, cert_password, std::move(schema), - std::move(permutation)); + *output = + new IgniteDataset(ctx, cache_name, host, port, local, part, page_size, + username, password, certfile, keyfile, cert_password, + std::move(schema), std::move(permutation)); } }; REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU), IgniteDatasetOp); +} // namespace } // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h index 5491af68d6..6f417a3cb5 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -13,31 +13,28 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H #include "ignite_client.h" -#endif #include -namespace ignite { +namespace tensorflow { class PlainClient : public Client { public: PlainClient(std::string host, int port); ~PlainClient(); - virtual tensorflow::Status Connect(); - virtual tensorflow::Status Disconnect(); + virtual Status Connect(); + virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); - virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, int32_t length); + virtual Status WriteData(uint8_t* buf, int32_t length); private: - std::string host; - int port; - int sock; + const std::string host_; + const int port_; + int sock_; }; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc index dbfa4f8786..a4c58a9563 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -29,104 +29,98 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" -namespace ignite { +namespace tensorflow { PlainClient::PlainClient(std::string host, int port) - : host(host), port(port), sock(-1) {} + : host_(host), port_(port), sock_(-1) {} PlainClient::~PlainClient() { if (IsConnected()) { - tensorflow::Status status = Disconnect(); + Status status = Disconnect(); if (!status.ok()) LOG(WARNING) << status.ToString(); } } -tensorflow::Status PlainClient::Connect() { - if (sock == -1) { - sock = socket(AF_INET, SOCK_STREAM, 0); - if (sock == -1) - return tensorflow::errors::Internal("Failed to create socket"); +Status PlainClient::Connect() { + if (sock_ == -1) { + sock_ = socket(AF_INET, SOCK_STREAM, 0); + if (sock_ == -1) return errors::Internal("Failed to create socket"); } sockaddr_in server; - server.sin_addr.s_addr = inet_addr(host.c_str()); + server.sin_addr.s_addr = inet_addr(host_.c_str()); if (server.sin_addr.s_addr == -1) { hostent* he; in_addr** addr_list; - if ((he = gethostbyname(host.c_str())) == NULL) - return tensorflow::errors::Internal("Failed to resolve hostname \"", host, - "\""); + if ((he = gethostbyname(host_.c_str())) == NULL) + return errors::Internal("Failed to resolve hostname \"", host_, "\""); addr_list = (in_addr**)he->h_addr_list; if (addr_list[0] != NULL) server.sin_addr = *addr_list[0]; } server.sin_family = AF_INET; - server.sin_port = htons(port); + server.sin_port = htons(port_); - if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0) - return tensorflow::errors::Internal("Failed to connect to \"", host, ":", - port, "\""); + if (connect(sock_, (sockaddr*)&server, sizeof(server)) < 0) + return errors::Internal("Failed to connect to \"", host_, ":", port_, "\""); - LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established"; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::Disconnect() { - int close_res = close(sock); - sock = -1; +Status PlainClient::Disconnect() { + int close_res = close(sock_); + sock_ = -1; - LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed"; + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" is closed"; - return close_res == 0 ? tensorflow::Status::OK() - : tensorflow::errors::Internal( - "Failed to correctly close connection"); + return close_res == 0 + ? Status::OK() + : errors::Internal("Failed to correctly close connection"); } -bool PlainClient::IsConnected() { return sock != -1; } +bool PlainClient::IsConnected() { return sock_ != -1; } -int PlainClient::GetSocketDescriptor() { return sock; } +int PlainClient::GetSocketDescriptor() { return sock_; } -tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) { +Status PlainClient::ReadData(uint8_t* buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock, buf, length - recieved, 0); + int res = recv(sock_, buf, length - recieved, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while reading from socket: ", res, ", ", - std::string(strerror(errno))); + return errors::Internal("Error occured while reading from socket: ", res, + ", ", std::string(strerror(errno))); - if (res == 0) - return tensorflow::errors::Internal("Server closed connection"); + if (res == 0) return errors::Internal("Server closed connection"); recieved += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) { +Status PlainClient::WriteData(uint8_t* buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock, buf, length - sent, 0); + int res = send(sock_, buf, length - sent, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while writing into socket: ", res, ", ", - std::string(strerror(errno))); + return errors::Internal("Error occured while writing into socket: ", res, + ", ", std::string(strerror(errno))); sent += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index f78c9b3627..7ba037f2d2 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -27,48 +27,45 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" -namespace ignite { +namespace tensorflow { PlainClient::PlainClient(std::string host, int port) - : host(host), port(port), sock(INVALID_SOCKET) {} + : host_(host), port_(port), sock_(INVALID_SOCKET) {} PlainClient::~PlainClient() { if (IsConnected()) { - tensorflow::Status status = Disconnect(); + Status status = Disconnect(); if (!status.ok()) LOG(WARNING) << status.ToString(); } } -tensorflow::Status PlainClient::Connect() { +Status PlainClient::Connect() { WSADATA wsaData; addrinfo *result = NULL, *ptr = NULL, hints; int res = WSAStartup(MAKEWORD(2, 2), &wsaData); - if (res != 0) - return tensorflow::errors::Internal("WSAStartup failed with error: ", res); + if (res != 0) return errors::Internal("WSAStartup failed with error: ", res); ZeroMemory(&hints, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; - res = - getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result); - if (res != 0) - return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res); + res = getaddrinfo(host_.c_str(), std::to_string(port_).c_str(), &hints, + &result); + if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { - sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); - if (sock == INVALID_SOCKET) { + sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); + if (sock_ == INVALID_SOCKET) { WSACleanup(); - return tensorflow::errors::Internal("Socket failed with error: ", - WSAGetLastError()); + return errors::Internal("Socket failed with error: ", WSAGetLastError()); } - res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen); + res = connect(sock_, ptr->ai_addr, (int)ptr->ai_addrlen); if (res == SOCKET_ERROR) { - closesocket(sock); - sock = INVALID_SOCKET; + closesocket(sock_); + sock_ = INVALID_SOCKET; continue; } @@ -77,67 +74,63 @@ tensorflow::Status PlainClient::Connect() { freeaddrinfo(result); - if (sock == INVALID_SOCKET) { + if (sock_ == INVALID_SOCKET) { WSACleanup(); - return tensorflow::errors::Internal("Unable to connect to server"); + return errors::Internal("Unable to connect to server"); } - LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established"; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::Disconnect() { - int res = shutdown(sock, SD_SEND); - closesocket(sock); +Status PlainClient::Disconnect() { + int res = shutdown(sock_, SD_SEND); + closesocket(sock_); WSACleanup(); if (res == SOCKET_ERROR) - return tensorflow::errors::Internal("Shutdown failed with error: ", - WSAGetLastError()); + return errors::Internal("Shutdown failed with error: ", WSAGetLastError()); else - return tensorflow::Status::OK(); + return Status::OK(); } -bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; } +bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; } -int PlainClient::GetSocketDescriptor() { return sock; } +int PlainClient::GetSocketDescriptor() { return sock_; } -tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) { +Status PlainClient::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock, buf, length - recieved, 0); + int res = recv(sock_, buf, length - recieved, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while reading from socket: ", res); + return errors::Internal("Error occured while reading from socket: ", res); - if (res == 0) - return tensorflow::errors::Internal("Server closed connection"); + if (res == 0) return errors::Internal("Server closed connection"); recieved += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) { +Status PlainClient::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock, buf, length - sent, 0); + int res = send(sock_, buf, length - sent, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while writing into socket: ", res); + return errors::Internal("Error occured while writing into socket: ", res); sent += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc index a1101b91f3..a2bc6b9609 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -21,7 +21,7 @@ limitations under the License. #include #include -namespace ignite { +namespace tensorflow { static int PasswordCb(char *buf, int size, int rwflag, void *password) { strncpy(buf, (char *)(password), size); @@ -31,119 +31,112 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) { SslWrapper::SslWrapper(std::shared_ptr client, std::string certfile, std::string keyfile, std::string cert_password) - : client(client), - certfile(certfile), - keyfile(keyfile), - cert_password(cert_password), - ctx(NULL) {} + : client_(client), + certfile_(certfile), + keyfile_(keyfile), + cert_password_(cert_password), + ctx_(NULL) {} SslWrapper::~SslWrapper() { if (IsConnected()) { - tensorflow::Status status = Disconnect(); + Status status = Disconnect(); if (!status.ok()) LOG(WARNING) << status.ToString(); } - if (ctx != NULL) { - SSL_CTX_free(ctx); - ctx = NULL; + if (ctx_ != NULL) { + SSL_CTX_free(ctx_); + ctx_ = NULL; } } -tensorflow::Status SslWrapper::InitSslContext() { +Status SslWrapper::InitSslContext() { OpenSSL_add_all_algorithms(); SSL_load_error_strings(); - ctx = SSL_CTX_new(SSLv23_method()); - if (ctx == NULL) - return tensorflow::errors::Internal("Couldn't create SSL context"); + ctx_ = SSL_CTX_new(SSLv23_method()); + if (ctx_ == NULL) return errors::Internal("Couldn't create SSL context"); - SSL_CTX_set_default_passwd_cb(ctx, PasswordCb); - SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str()); + SSL_CTX_set_default_passwd_cb(ctx_, PasswordCb); + SSL_CTX_set_default_passwd_cb_userdata(ctx_, (void *)cert_password_.c_str()); - if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1) - return tensorflow::errors::Internal( - "Couldn't load cetificate chain (file '", certfile, "')"); + if (SSL_CTX_use_certificate_chain_file(ctx_, certfile_.c_str()) != 1) + return errors::Internal("Couldn't load cetificate chain (file '", certfile_, + "')"); - std::string private_key_file = keyfile.empty() ? certfile : keyfile; - if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(), + std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; + if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(), SSL_FILETYPE_PEM) != 1) - return tensorflow::errors::Internal("Couldn't load private key (file '", - private_key_file, "')"); + return errors::Internal("Couldn't load private key (file '", + private_key_file, "')"); - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status SslWrapper::Connect() { - tensorflow::Status status; - - if (ctx == NULL) { - status = InitSslContext(); - if (!status.ok()) return status; +Status SslWrapper::Connect() { + if (ctx_ == NULL) { + TF_RETURN_IF_ERROR(InitSslContext()); } - ssl = SSL_new(ctx); - if (ssl == NULL) - return tensorflow::errors::Internal("Failed to establish SSL connection"); + ssl_ = SSL_new(ctx_); + if (ssl_ == NULL) + return errors::Internal("Failed to establish SSL connection"); - status = client->Connect(); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(client_->Connect()); - SSL_set_fd(ssl, client->GetSocketDescriptor()); - if (SSL_connect(ssl) != 1) - return tensorflow::errors::Internal("Failed to establish SSL connection"); + SSL_set_fd(ssl_, client_->GetSocketDescriptor()); + if (SSL_connect(ssl_) != 1) + return errors::Internal("Failed to establish SSL connection"); LOG(INFO) << "SSL connection established"; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status SslWrapper::Disconnect() { - SSL_free(ssl); +Status SslWrapper::Disconnect() { + SSL_free(ssl_); LOG(INFO) << "SSL connection closed"; - return client->Disconnect(); + return client_->Disconnect(); } -bool SslWrapper::IsConnected() { return client->IsConnected(); } +bool SslWrapper::IsConnected() { return client_->IsConnected(); } -int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); } +int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); } -tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { +Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = SSL_read(ssl, buf, length - recieved); + int res = SSL_read(ssl_, buf, length - recieved); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while reading from SSL socket: ", res); + return errors::Internal("Error occured while reading from SSL socket: ", + res); - if (res == 0) - return tensorflow::errors::Internal("Server closed SSL connection"); + if (res == 0) return errors::Internal("Server closed SSL connection"); recieved += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { +Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = SSL_write(ssl, buf, length - sent); + int res = SSL_write(ssl_, buf, length - sent); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while writing into socket: ", res); + return errors::Internal("Error occured while writing into socket: ", res); sent += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h index e0c2a242dc..bbba6cc181 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -13,15 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H #include "ignite_client.h" -#endif #include #include -namespace ignite { +namespace tensorflow { class SslWrapper : public Client { public: @@ -29,21 +26,22 @@ class SslWrapper : public Client { std::string keyfile, std::string cert_password); ~SslWrapper(); - virtual tensorflow::Status Connect(); - virtual tensorflow::Status Disconnect(); + virtual Status Connect(); + virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); - virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, int32_t length); + virtual Status WriteData(uint8_t* buf, int32_t length); private: - std::shared_ptr client; - std::string certfile; - std::string keyfile; - std::string cert_password; - SSL_CTX* ctx; - SSL* ssl; - tensorflow::Status InitSslContext(); + std::shared_ptr client_; + std::string certfile_; + std::string keyfile_; + std::string cert_password_; + SSL_CTX* ctx_; + SSL* ssl_; + + Status InitSslContext(); }; -} // namespace ignite +} // namespace tensorflow -- GitLab From 1408a1563e73e69f68c1eb6f34a0976c7c950ad9 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 28 Aug 2018 11:32:57 +0300 Subject: [PATCH 0105/1357] Update README.md. --- tensorflow/contrib/ignite/README.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md index f2596fc572..8fec4066c4 100644 --- a/tensorflow/contrib/ignite/README.md +++ b/tensorflow/contrib/ignite/README.md @@ -13,19 +13,20 @@ ## Overview [Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for -transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. +transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a data source for neural network training, inference and all other computations supported by TensorFlow. ## Features -Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below. +Ignite Dataset provides features that that you can use in a wide range of cases. The most important and interesting features are described below. ### Distributed In-Memory Datasource -[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. +[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that provides fast data access. It allows you to avoid limitations of hard drive and and store and operate with as much data as you need in distributed cluster. You can utilize +these benefits of Apache Ignite by using Ignite Dataset. Moreover, Ignite Dataset can be used for the following use-cases: - If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations. - If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations. - If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow. -It's important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference. +Note that Apache Ignite is not just a step of ETL pipeline between a database or a data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. By choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, at the same time, an ability to use this data for neural network training and inference. ```bash $ apache-ignite-fabric/bin/ignite.sh @@ -55,7 +56,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL ``` ### Structured Objects -[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. +[Apache Ignite](https://ignite.apache.org/) allows to store any type of objects. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. ```python >>> import tensorflow as tf @@ -81,7 +82,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL } } ``` - Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. + Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. ```python >>> import tensorflow as tf @@ -99,15 +100,15 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL ### Distributed Training -TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. +TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is the ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. -Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck. +Using this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottlenecks. -Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition. +Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL), we can specify the number of partitions the data will be partitioned on. For example, if an Apache Ignite cluster consists of 10 machines and we create cache with 10 partitions, then every machine will maintain approximately one data partition. -Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. +Ignite Dataset allows using these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that can be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach, we can assign a specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. ```python >>> import tensorflow as tf @@ -135,7 +136,7 @@ High-level TensorFlow API for [distributed training](https://www.tensorflow.org/ ### SSL Connection -Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. +Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information, please refer to the [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. ```python >>> import tensorflow as tf @@ -147,11 +148,11 @@ Your data should not be accessible without any control. Apache Ignite allows to ### Windows Support -Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. +Ignite Dataset is fully compatible with Windows. You can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. ## Try it out -The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: +The simplest way to try Ignite Dataset is to run a [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and after start interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: ``` docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist @@ -163,4 +164,4 @@ After that you will be able to work with it following way: ## Limitations -Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. +Presently, Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of an object structure. -- GitLab From 92019765d7b7db99d0235268d00f349b7a53d1a9 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Wed, 5 Sep 2018 14:47:20 +0000 Subject: [PATCH 0106/1357] Fix pylint checks, fix VS compilation issue. --- .../contrib/ignite/kernels/ignite_plain_client_windows.cc | 4 ++-- .../contrib/ignite/python/ops/ignite_dataset_ops.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 7ba037f2d2..e1e2ee3b20 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock_, buf, length - recieved, 0); + int res = recv(sock_, (char*)buf, length - recieved, 0); if (res < 0) return errors::Internal("Error occured while reading from socket: ", res); @@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock_, buf, length - sent, 0); + int res = send(sock_, (char*)buf, length - sent, 0); if (res < 0) return errors::Internal("Error occured while writing into socket: ", res); diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py index 6fa073957a..60003ca3b7 100644 --- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -66,13 +66,13 @@ class Readable(): def __read(self, data_type, length): """Reads, unpacks and returns specified type (little-endian).""" - buffer = self.read_data(length) - return struct.unpack("<" + data_type, buffer)[0] + data_buffer = self.read_data(length) + return struct.unpack("<" + data_type, data_buffer)[0] class DataBuffer(Readable): """DataBuffer class that exposes methods to read data from a byte buffer.""" - def __init__(self, buffer): + def __init__(self, data_buffer): """Constructs a new instance of DataBuffer based on the specified byte buffer. @@ -80,7 +80,7 @@ class DataBuffer(Readable): buffer: Buffer to be read. """ Readable.__init__(self) - self.buffer = buffer + self.buffer = data_buffer self.ptr = 0 def read_data(self, length): -- GitLab From 0b6654bc223f4f3807209043dc34ccb07b55474e Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 11 Sep 2018 09:50:47 +0000 Subject: [PATCH 0107/1357] Fix code style. --- .../ignite/kernels/ignite_dataset_ops.cc | 2 +- .../kernels/ignite_plain_client_windows.cc | 4 +-- tensorflow/contrib/ignite/ops/dataset_ops.cc | 34 +++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index 89eecf9c14..d03404a460 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_dataset.h" #include +#include "ignite_dataset.h" #include "tensorflow/core/framework/dataset.h" namespace tensorflow { diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index e1e2ee3b20..8182fde6d9 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock_, (char*)buf, length - recieved, 0); + int res = recv(sock_, (char *)buf, length - recieved, 0); if (res < 0) return errors::Internal("Error occured while reading from socket: ", res); @@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock_, (char*)buf, length - sent, 0); + int res = send(sock_, (char *)buf, length - sent, 0); if (res < 0) return errors::Internal("Error occured while writing into socket: ", res); diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc index 17494d1cfd..fb16b290b1 100644 --- a/tensorflow/contrib/ignite/ops/dataset_ops.cc +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -20,23 +20,23 @@ limitations under the License. namespace tensorflow { REGISTER_OP("IgniteDataset") - .Input("cache_name: string") - .Input("host: string") - .Input("port: int32") - .Input("local: bool") - .Input("part: int32") - .Input("page_size: int32") - .Input("username: string") - .Input("password: string") - .Input("certfile: string") - .Input("keyfile: string") - .Input("cert_password: string") - .Input("schema: int32") - .Input("permutation: int32") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( + .Input("cache_name: string") + .Input("host: string") + .Input("port: int32") + .Input("local: bool") + .Input("part: int32") + .Input("page_size: int32") + .Input("username: string") + .Input("password: string") + .Input("certfile: string") + .Input("keyfile: string") + .Input("cert_password: string") + .Input("schema: int32") + .Input("permutation: int32") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( Apache Ignite is a memory-centric distributed database, caching, and processing platform for transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an -- GitLab From 9ac00398d1c0e5f3f2e76dec15fa6646f5027633 Mon Sep 17 00:00:00 2001 From: Smokrow Date: Tue, 11 Sep 2018 17:26:16 +0200 Subject: [PATCH 0108/1357] Update of flat_map Rework based on Marks review --- tensorflow/python/data/ops/dataset_ops.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 14a1e3d803..2fc41a3b98 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1009,16 +1009,18 @@ class Dataset(object): def flat_map(self, map_func): """Maps `map_func` across this dataset and flattens the result. - Will produce similar results to `tf.data.Dataset.interleave(cycle_length=1)`. + `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since + `flat_map` produces a similar outputs as `tf.data.Dataset.interleave(cycle_length=1)` + Use `flat_map` if you want to make sure, that the order of your dataset stays the same. - For example: + For example, to implement unbatch: ```python # NOTE: The following examples use `{ ... }` to represent the # contents of a dataset. '[...]' represents a tensor. a = {[1,2,3,4,5], [6,7,8,9], [10]} - a.flat_map(lambda x: Dataset.from_tensors(x)) == + a.flat_map(lambda x: Dataset.from_tensor_slices(x)) == {[1,2,3,4,5,6,7,8,9,10]} ``` Args: -- GitLab From 82d082a2d775843a858919f4de84b3f6dfe0d62d Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 12 Sep 2018 02:11:42 +0000 Subject: [PATCH 0109/1357] Add unit test for TensorArray with int64 in GPU Signed-off-by: Yong Tang --- .../python/kernel_tests/tensor_array_ops_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index 6de6fbe767..b47e750f4b 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -1504,6 +1504,19 @@ class TensorArrayTest(test.TestCase): vdx, vdy = sess.run([dx, dy]) self.assertAllClose(vdx, vdy) + def testTensorArrayInt64GPU(self): + if not test.is_gpu_available(): + return + with self.test_session(use_gpu=True, force_gpu=True) as sess: + value = array_ops.placeholder(dtypes.int64) + ta = tensor_array_ops.TensorArray(dtype=dtypes.int64, size=2) + ta = ta.scatter([0, 1], value) + r0 = ta.read(0) + r1 = ta.read(1) + v0, v1 = sess.run([r0, r1], feed_dict={value: [-3, 100]}) + self.assertAllEqual(v0, -3) + self.assertAllEqual(v1, 100) + if __name__ == "__main__": test.main() -- GitLab From 5e9a9547f907599f6954fc5e28b7a78acf3b54eb Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 12 Sep 2018 11:02:12 +0800 Subject: [PATCH 0110/1357] Revert "Add XLA support for LeakyReluOp." This reverts commit d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74. Since bfloat16 was not supported by LeakyRelu, but it should be supported in XLA Ops. --- tensorflow/compiler/tests/binary_ops_test.py | 8 ---- tensorflow/compiler/tests/unary_ops_test.py | 5 --- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 ------------------- 3 files changed, 55 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index c478ff4eea..17280e445b 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -178,14 +178,6 @@ class BinaryOpsTest(xla_test.XLATestCase): [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) - self._testBinary( - gen_nn_ops.leaky_relu_grad, - np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), - np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - dtype=dtype), - expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], - dtype=dtype)) - self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index dd29ef34ce..5b0e57f83f 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -361,11 +361,6 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[-0.05, 6.05, 5]], dtype=dtype), expected=np.array([[0, 6, 5]], dtype=dtype)) - self._assertOpOutputMatchesExpected( - nn_ops.leaky_relu, - np.array([[-1.0, 1.0]], dtype=dtype), - expected=np.array([[-0.2, 1.0]], dtype=dtype)) - self._assertOpOutputMatchesExpected( nn_ops.softmax, np.array([1, 2, 3, 4], dtype=dtype), diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index 8d65e0339c..d35777ccb1 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,23 +50,6 @@ class Relu6Op : public XlaOpKernel { } }; -class LeakyReluOp : public XlaOpKernel { - public: - explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); - } - // Compute the max of the input x and alpha*x. - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaBuilder* builder = ctx->builder(); - auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), - static_cast(alpha_)); - ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); - } - - private: - float alpha_; -}; - class ReluGradOp : public XlaOpKernel { public: explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} @@ -101,35 +84,10 @@ class Relu6GradOp : public XlaOpKernel { } }; -class LeakyReluGradOp : public XlaOpKernel { - public: - explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); - } - // Return the lhs (incoming gradient) if the rhs (input feature) > 0, - // otherwise return the alpha * lhs. - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaBuilder* b = ctx->builder(); - const TensorShape shape = ctx->InputShape(0); - const auto zero = - xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); - const auto pred = xla::Gt(ctx->Input(1), zero); - auto alpha = - XlaHelpers::FloatLiteral(b, input_type(0), static_cast(alpha_)); - ctx->SetOutput( - 0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); - } - - private: - float alpha_; -}; - REGISTER_XLA_OP(Name("Relu"), ReluOp); REGISTER_XLA_OP(Name("Relu6"), Relu6Op); -REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp); REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp); REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp); -REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp); } // namespace } // namespace tensorflow -- GitLab From 8c51bbcd1b8d7d32a634df6eadde084e87ede1bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 12 Sep 2018 14:01:09 +0800 Subject: [PATCH 0111/1357] BLD: update golden file --- .../v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt | 5 +++++ .../v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt | 5 +++++ .../v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt | 5 +++++ .../v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt index 7027e78df4..150dd21dbc 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-classifier.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.estimator.BoostedTreesClassifier" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -31,6 +32,10 @@ tf_class { name: "evaluate" argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "experimental_feature_importances" + argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], " + } member_method { name: "export_saved_model" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt index d8167ea7cb..6e7b5a3d47 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-boosted-trees-regressor.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.estimator.BoostedTreesRegressor" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -31,6 +32,10 @@ tf_class { name: "evaluate" argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "experimental_feature_importances" + argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], " + } member_method { name: "export_saved_model" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt index 7027e78df4..150dd21dbc 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-classifier.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.estimator.BoostedTreesClassifier" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -31,6 +32,10 @@ tf_class { name: "evaluate" argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "experimental_feature_importances" + argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], " + } member_method { name: "export_saved_model" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt index d8167ea7cb..6e7b5a3d47 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-boosted-trees-regressor.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.estimator.BoostedTreesRegressor" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { @@ -31,6 +32,10 @@ tf_class { name: "evaluate" argspec: "args=[\'self\', \'input_fn\', \'steps\', \'hooks\', \'checkpoint_path\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "experimental_feature_importances" + argspec: "args=[\'self\', \'normalize\'], varargs=None, keywords=None, defaults=[\'False\'], " + } member_method { name: "export_saved_model" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " -- GitLab From 2dd5fb6cfb16ccc612b6e278d6282ef90581c0bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 11 Sep 2018 21:35:22 +0800 Subject: [PATCH 0112/1357] CLN: fix merge error --- tensorflow/contrib/losses/python/losses/loss_ops.py | 7 ++++--- tensorflow/contrib/metrics/python/ops/metric_ops.py | 4 ++-- tensorflow/contrib/rate/rate.py | 2 +- tensorflow/python/kernel_tests/losses_test.py | 1 - tensorflow/python/ops/losses/losses_impl.py | 3 ++- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 8a0932c376..66322140cb 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -585,9 +585,10 @@ def mean_pairwise_squared_error(predictions, math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, - math_ops.maximum(num_present_per_batch), - name="value") + term1 = 2.0 * math_ops.div_no_nan( + sum_squares_diff_per_batch, + math_ops.maximum(num_present_per_batch, 0), + name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff), diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 1ddd7e521b..d7c73c8f99 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -3904,8 +3904,8 @@ def cohen_kappa(labels, total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( math_ops.div_no_nan( - pe_row * pe_col, - math_ops.maximum(total, 0), + math_ops.to_double(pe_row * pe_col), + math_ops.to_double(total), name=None)) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 489d5cce78..d948066b36 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -142,5 +142,5 @@ class Rate(object): state_ops.assign(self.prev_denominator, denominator) return math_ops.div_no_nan(self.numer, - math_op.maximum(self.denom, 0), + math_ops.maximum(self.denom, 0), name="safe_rate") diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index c45b5035de..273a916fe5 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index a980a43f62..2035aaf9fe 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -586,7 +586,8 @@ def mean_pairwise_squared_error( term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), math_ops.maximum( - math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), + math_ops.multiply(num_present_per_batch, + num_present_per_batch - 1), 0), name="value") -- GitLab From e3c334e57fba9afc0b0a3aa5f7787ee35e17ddf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 12 Sep 2018 14:59:44 +0800 Subject: [PATCH 0113/1357] CLN: remove unnecessary math_ops.maximum --- tensorflow/contrib/losses/python/losses/loss_ops.py | 11 ++++------- tensorflow/contrib/metrics/python/ops/metric_ops.py | 8 ++++---- tensorflow/python/keras/engine/training_utils.py | 3 +-- tensorflow/python/keras/metrics.py | 2 +- tensorflow/python/ops/losses/losses_impl.py | 4 +--- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 66322140cb..7e5ab05987 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -78,9 +78,7 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return math_ops.div_no_nan(total_loss, - math_ops.maximum(num_present, 0), - name="value") + return math_ops.div_no_nan(total_loss, num_present, name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -585,10 +583,9 @@ def mean_pairwise_squared_error(predictions, math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * math_ops.div_no_nan( - sum_squares_diff_per_batch, - math_ops.maximum(num_present_per_batch, 0), - name="value") + term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, + num_present_per_batch, + name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff), diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index d7c73c8f99..91939b5bf2 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -3222,11 +3222,11 @@ def streaming_covariance(predictions, # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = math_ops.div_no_nan( math_ops.reduce_sum(weighted_predictions), - math_ops.maximum(batch_count, 0), + batch_count, name='batch_mean_prediction') delta_mean_prediction = math_ops.div_no_nan( (batch_mean_prediction - mean_prediction) * batch_count, - math_ops.maximum(update_count, 0), + update_count, name='delta_mean_prediction') update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) @@ -3236,11 +3236,11 @@ def streaming_covariance(predictions, # batch_mean_label is E[y_B] in the update equation batch_mean_label = math_ops.div_no_nan( math_ops.reduce_sum(weighted_labels), - math_ops.maximum(batch_count, 0), + batch_count, name='batch_mean_label') delta_mean_label = math_ops.div_no_nan( (batch_mean_label - mean_label) * batch_count, - math_ops.maximum(update_count, 0), + update_count, name='delta_mean_label') update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 9082b9f0fa..c23168ccef 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -613,8 +613,7 @@ def weighted_masked_objective(fn): score_array = math_ops.multiply(score_array, weights) score_array = math_ops.reduce_sum(score_array) weights = math_ops.reduce_sum(weights) - score_array = math_ops.div_no_nan(score_array, - math_ops.maximum(weights, 0)) + score_array = math_ops.div_no_nan(score_array, weights) return K.mean(score_array) return weighted diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 4050eb95a4..f85b6554bd 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -488,7 +488,7 @@ class Mean(Metric): state_ops.assign_add(self.count, num_values) def result(self): - return math_ops.div_no_nan(self.total, math_ops.maximum(self.count, 0)) + return math_ops.div_no_nan(self.total, self.count) class MeanMetricWrapper(Mean): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 2035aaf9fe..fe4950a475 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -86,9 +86,7 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return math_ops.div_no_nan(total_loss, - math_ops.maximum(num_present, 0), - name="value") + return math_ops.div_no_nan(total_loss, num_present, name="value") def _num_present(losses, weights, per_batch=False): -- GitLab From fd41d2c959372d7a068cb4474391362ef6a92fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 12 Sep 2018 15:04:28 +0800 Subject: [PATCH 0114/1357] CLN: fix code style --- tensorflow/python/estimator/canned/boosted_trees_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index a176b4941f..c1309fb809 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -1083,7 +1083,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'continuous', dtype=dtypes.float32) with self.assertRaisesRegexp(ValueError, - 'only bucketized_column and indicator_column'): + 'only bucketized_column and indicator_column'): _ = boosted_trees.BoostedTreesRegressor( feature_columns=[numeric_col], n_batches_per_layer=1, -- GitLab From 5f69ba51752561f6294705b5d66705bdf322831d Mon Sep 17 00:00:00 2001 From: Johannes Bannhofer <4116408+joba01@users.noreply.github.com> Date: Wed, 12 Sep 2018 09:23:02 +0200 Subject: [PATCH 0115/1357] Fixed wrong variable name in example The Keras model used a wrong variable name in the MirroredStrategy example --- tensorflow/contrib/distribute/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distribute/README.md b/tensorflow/contrib/distribute/README.md index 30e1992c01..91a27f97b7 100644 --- a/tensorflow/contrib/distribute/README.md +++ b/tensorflow/contrib/distribute/README.md @@ -76,7 +76,7 @@ We then compile the Keras model and pass the `MirroredStrategy` object in the ```python model.compile(loss='mean_squared_error', optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.2), - distribute=strategy) + distribute=distribution) ``` To train the model we call Keras `fit` API using the input dataset that we -- GitLab From 9a13fc35951cef95d4dc71dabce4c270eb73d62a Mon Sep 17 00:00:00 2001 From: hellcom Date: Wed, 12 Sep 2018 10:58:24 +0300 Subject: [PATCH 0116/1357] Fix missprint - unknown variable name. Signed-off-by: hellcom --- configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 361bd4764d..52a513779e 100644 --- a/configure.py +++ b/configure.py @@ -852,7 +852,7 @@ def set_tf_cuda_version(environ_cp): # Reset and retry print('Invalid path to CUDA %s toolkit. %s cannot be found' % - (tf_cuda_version, cuda_toolkit_path_full)) + (tf_cuda_version, cuda_toolkit_paths_full)) environ_cp['TF_CUDA_VERSION'] = '' environ_cp['CUDA_TOOLKIT_PATH'] = '' -- GitLab From 9ec9c8b24cca5f1e746fef8cd351b3cae6d5a740 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Wed, 12 Sep 2018 20:42:01 +0300 Subject: [PATCH 0117/1357] Fixes after second review. --- tensorflow/contrib/ignite/BUILD | 1 + tensorflow/contrib/ignite/__init__.py | 22 +- .../kernels/ignite_binary_object_parser.cc | 404 ++++++++++-------- .../kernels/ignite_binary_object_parser.h | 36 +- .../contrib/ignite/kernels/ignite_client.h | 55 ++- .../contrib/ignite/kernels/ignite_dataset.cc | 99 ++--- .../contrib/ignite/kernels/ignite_dataset.h | 37 +- .../ignite/kernels/ignite_dataset_iterator.cc | 383 ++++++++--------- .../ignite/kernels/ignite_dataset_iterator.h | 74 ++-- .../ignite/kernels/ignite_dataset_ops.cc | 123 ++++-- .../ignite/kernels/ignite_plain_client.h | 15 +- .../kernels/ignite_plain_client_unix.cc | 14 +- .../kernels/ignite_plain_client_windows.cc | 17 +- .../ignite/kernels/ignite_ssl_wrapper.cc | 34 +- .../ignite/kernels/ignite_ssl_wrapper.h | 26 +- tensorflow/contrib/ignite/ops/dataset_ops.cc | 2 + .../ignite/python/ops/ignite_dataset_ops.py | 176 ++++---- 17 files changed, 848 insertions(+), 670 deletions(-) diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index b7d40a99f7..2f598b4aed 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -40,6 +40,7 @@ cc_library( srcs = [ "kernels/ignite_dataset_ops.cc", "kernels/ignite_client.h", + "kernels/ignite_byte_swapper.h", "kernels/ignite_plain_client.h", "kernels/ignite_ssl_wrapper.h", "kernels/ignite_ssl_wrapper.cc", diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py index b78829d0f4..f42947696f 100644 --- a/tensorflow/contrib/ignite/__init__.py +++ b/tensorflow/contrib/ignite/__init__.py @@ -12,16 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Apache Ignite is a memory-centric distributed database, caching, and - processing platform for transactional, analytical, and streaming workloads, - delivering in-memory speeds at petabyte scale. This contrib package - contains an integration between Apache Ignite and TensorFlow. The - integration is based on tf.data from TensorFlow side and Binary Client - Protocol from Apache Ignite side. It allows to use Apache Ignite as a - datasource for neural network training, inference and all other - computations supported by TensorFlow. Ignite Dataset is based on Apache - Ignite Binary Client Protocol: - https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +"""IgniteDataset that allows to get data from Apache Ignite. + +Apache Ignite is a memory-centric distributed database, caching, and +processing platform for transactional, analytical, and streaming workloads, +delivering in-memory speeds at petabyte scale. This contrib package +contains an integration between Apache Ignite and TensorFlow. The +integration is based on tf.data from TensorFlow side and Binary Client +Protocol from Apache Ignite side. It allows to use Apache Ignite as a +datasource for neural network training, inference and all other +computations supported by TensorFlow. Ignite Dataset is based on Apache +Ignite Binary Client Protocol: +https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. @@IgniteDataset """ diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc index 9bf4480d2d..2c8a7d44b0 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -13,242 +13,171 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" namespace tensorflow { +BinaryObjectParser::BinaryObjectParser() : byte_swapper_(ByteSwapper(false)) {} + Status BinaryObjectParser::Parse(uint8_t** ptr, std::vector* out_tensors, - std::vector* types) { - uint8_t object_type_id = **ptr; - *ptr += 1; + std::vector* types) const { + uint8_t object_type_id = ParseByte(ptr); + + // Skip non-leaf nodes. + if (object_type_id != WRAPPED_OBJ && object_type_id != COMPLEX_OBJ) + types->push_back(object_type_id); switch (object_type_id) { case BYTE: { - Tensor tensor(cpu_allocator(), DT_UINT8, {}); - tensor.scalar()() = *((uint8_t*)*ptr); - *ptr += 1; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_UINT8, TensorShape({})); + out_tensors->back().scalar()() = ParseByte(ptr); break; } case SHORT: { - Tensor tensor(cpu_allocator(), DT_INT16, {}); - tensor.scalar()() = *((int16_t*)*ptr); - *ptr += 2; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_INT16, TensorShape({})); + out_tensors->back().scalar()() = ParseShort(ptr); + break; + } + case USHORT: { + out_tensors->emplace_back(cpu_allocator(), DT_UINT16, TensorShape({})); + out_tensors->back().scalar()() = ParseUnsignedShort(ptr); break; } case INT: { - Tensor tensor(cpu_allocator(), DT_INT32, {}); - tensor.scalar()() = *((int32_t*)*ptr); - *ptr += 4; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_INT32, TensorShape({})); + out_tensors->back().scalar()() = ParseInt(ptr); break; } case LONG: { - Tensor tensor(cpu_allocator(), DT_INT64, {}); - tensor.scalar()() = *((int64_t*)*ptr); - *ptr += 8; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({})); + out_tensors->back().scalar()() = ParseLong(ptr); break; } case FLOAT: { - Tensor tensor(cpu_allocator(), DT_FLOAT, {}); - tensor.scalar()() = *((float*)*ptr); - *ptr += 4; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, TensorShape({})); + out_tensors->back().scalar()() = ParseFloat(ptr); break; } case DOUBLE: { - Tensor tensor(cpu_allocator(), DT_DOUBLE, {}); - tensor.scalar()() = *((double*)*ptr); - *ptr += 8; - out_tensors->push_back(std::move(tensor)); - break; - } - case UCHAR: { - Tensor tensor(cpu_allocator(), DT_UINT16, {}); - tensor.scalar()() = *((uint16_t*)*ptr); - *ptr += 2; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, TensorShape({})); + out_tensors->back().scalar()() = ParseDouble(ptr); break; } case BOOL: { - Tensor tensor(cpu_allocator(), DT_BOOL, {}); - tensor.scalar()() = *((bool*)*ptr); - *ptr += 1; - out_tensors->push_back(std::move(tensor)); - + out_tensors->emplace_back(cpu_allocator(), DT_BOOL, TensorShape({})); + out_tensors->back().scalar()() = ParseBool(ptr); break; } case STRING: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_STRING, {}); - tensor.scalar()() = std::string((char*)*ptr, length); - *ptr += length; - out_tensors->push_back(std::move(tensor)); - + out_tensors->emplace_back(cpu_allocator(), DT_STRING, TensorShape({})); + out_tensors->back().scalar()() = ParseString(ptr); break; } case DATE: { - Tensor tensor(cpu_allocator(), DT_INT64, {}); - tensor.scalar()() = *((int64_t*)*ptr); - *ptr += 8; - out_tensors->push_back(std::move(tensor)); - + out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({})); + out_tensors->back().scalar()() = ParseLong(ptr); break; } case BYTE_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length})); - - uint8_t* arr = (uint8_t*)*ptr; - *ptr += length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + uint8_t* arr = ParseByteArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_UINT8, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case SHORT_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length})); - - int16_t* arr = (int16_t*)*ptr; - *ptr += length * 2; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int16_t* arr = ParseShortArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT16, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case USHORT_ARR: { + int32_t length = ParseInt(ptr); + uint16_t* arr = ParseUnsignedShortArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_UINT16, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case INT_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length})); - - int32_t* arr = (int32_t*)*ptr; - *ptr += length * 4; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int32_t* arr = ParseIntArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT32, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case LONG_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); - - int64_t* arr = (int64_t*)*ptr; - *ptr += length * 8; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int64_t* arr = ParseLongArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case FLOAT_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length})); - - float* arr = (float*)*ptr; - *ptr += 4 * length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + float* arr = ParseFloatArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case DOUBLE_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length})); - - double* arr = (double*)*ptr; - *ptr += 8 * length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); - break; - } - case UCHAR_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length})); - - uint16_t* arr = (uint16_t*)*ptr; - *ptr += length * 2; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + double* arr = ParseDoubleArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case BOOL_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length})); - - bool* arr = (bool*)*ptr; - *ptr += length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + bool* arr = ParseBoolArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_BOOL, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case STRING_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length})); - - for (int32_t i = 0; i < length; i++) { - int32_t str_length = *((int32_t*)*ptr); - *ptr += 4; - const int8_t* str = (const int8_t*)*ptr; - *ptr += str_length; - tensor.vec()(i) = std::string((char*)str, str_length); - } - - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + out_tensors->emplace_back(cpu_allocator(), DT_STRING, + TensorShape({length})); + for (int32_t i = 0; i < length; i++) + out_tensors->back().vec()(i) = ParseString(ptr); break; } case DATE_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); - int64_t* arr = (int64_t*)*ptr; - *ptr += length * 8; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int64_t* arr = ParseLongArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case WRAPPED_OBJ: { - int32_t byte_arr_size = *((int32_t*)*ptr); - *ptr += 4; - + int32_t byte_arr_size = ParseInt(ptr); TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); - - int32_t offset = *((int32_t*)*ptr); - *ptr += 4; + int32_t offset = ParseInt(ptr); break; } case COMPLEX_OBJ: { - uint8_t version = **ptr; - *ptr += 1; - int16_t flags = *((int16_t*)*ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 - *ptr += 2; - int32_t type_id = *((int32_t*)*ptr); - *ptr += 4; - int32_t hash_code = *((int32_t*)*ptr); - *ptr += 4; - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - int32_t schema_id = *((int32_t*)*ptr); - *ptr += 4; - int32_t schema_offset = *((int32_t*)*ptr); - *ptr += 4; - + uint8_t version = ParseByte(ptr); + int16_t flags = ParseShort(ptr); + int32_t type_id = ParseInt(ptr); + int32_t hash_code = ParseInt(ptr); + int32_t length = ParseInt(ptr); + int32_t schema_id = ParseInt(ptr); + int32_t schema_offset = ParseInt(ptr); + + // 24 is size of header just read. uint8_t* end = *ptr + schema_offset - 24; int32_t i = 0; while (*ptr < end) { @@ -261,12 +190,145 @@ Status BinaryObjectParser::Parse(uint8_t** ptr, break; } default: { - return errors::Internal("Unknowd binary type (type id ", - (int)object_type_id, ")"); + return errors::Unknown("Unknowd binary type (type id ", + (int)object_type_id, ")"); } } return Status::OK(); } +uint8_t BinaryObjectParser::ParseByte(uint8_t** ptr) const { + uint8_t res = **ptr; + *ptr += 1; + + return res; +} + +int16_t BinaryObjectParser::ParseShort(uint8_t** ptr) const { + int16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt16(res); + *ptr += 2; + + return *res; +} + +uint16_t BinaryObjectParser::ParseUnsignedShort(uint8_t** ptr) const { + uint16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredUnsignedInt16(res); + *ptr += 2; + + return *res; +} + +int32_t BinaryObjectParser::ParseInt(uint8_t** ptr) const { + int32_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt32(res); + *ptr += 4; + + return *res; +} + +int64_t BinaryObjectParser::ParseLong(uint8_t** ptr) const { + int64_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt64(res); + *ptr += 8; + + return *res; +} + +float BinaryObjectParser::ParseFloat(uint8_t** ptr) const { + float* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredFloat(res); + *ptr += 4; + + return *res; +} + +double BinaryObjectParser::ParseDouble(uint8_t** ptr) const { + double* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredDouble(res); + *ptr += 8; + + return *res; +} + +bool BinaryObjectParser::ParseBool(uint8_t** ptr) const { + bool res = **reinterpret_cast(ptr); + *ptr += 1; + + return res; +} + +string BinaryObjectParser::ParseString(uint8_t** ptr) const { + int32_t length = ParseInt(ptr); + string res(*reinterpret_cast(ptr), length); + *ptr += length; + + return res; +} + +uint8_t* BinaryObjectParser::ParseByteArr(uint8_t** ptr, int length) const { + uint8_t* res = *reinterpret_cast(ptr); + *ptr += length; + + return res; +} + +int16_t* BinaryObjectParser::ParseShortArr(uint8_t** ptr, int length) const { + int16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt16Arr(res, length); + *ptr += length * 2; + + return res; +} + +uint16_t* BinaryObjectParser::ParseUnsignedShortArr(uint8_t** ptr, + int length) const { + uint16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredUnsignedInt16Arr(res, length); + *ptr += length * 2; + + return res; +} + +int32_t* BinaryObjectParser::ParseIntArr(uint8_t** ptr, int length) const { + int32_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt32Arr(res, length); + *ptr += length * 4; + + return res; +} + +int64_t* BinaryObjectParser::ParseLongArr(uint8_t** ptr, int length) const { + int64_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt64Arr(res, length); + *ptr += length * 8; + + return res; +} + +float* BinaryObjectParser::ParseFloatArr(uint8_t** ptr, int length) const { + float* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredFloatArr(res, length); + *ptr += length * 4; + + return res; +} + +double* BinaryObjectParser::ParseDoubleArr(uint8_t** ptr, int length) const { + double* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredDoubleArr(res, length); + *ptr += length * 8; + + return res; +} + +bool* BinaryObjectParser::ParseBoolArr(uint8_t** ptr, int length) const { + bool* res = *reinterpret_cast(ptr); + *ptr += length; + + return res; +} + } // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h index 9accbd796f..eb1f856643 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -13,16 +13,42 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ + #include -#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/status.h" namespace tensorflow { class BinaryObjectParser { public: + BinaryObjectParser(); Status Parse(uint8_t** ptr, std::vector* out_tensors, - std::vector* types); + std::vector* types) const; + + private: + uint8_t ParseByte(uint8_t** ptr) const; + int16_t ParseShort(uint8_t** ptr) const; + uint16_t ParseUnsignedShort(uint8_t** ptr) const; + int32_t ParseInt(uint8_t** ptr) const; + int64_t ParseLong(uint8_t** ptr) const; + float ParseFloat(uint8_t** ptr) const; + double ParseDouble(uint8_t** ptr) const; + bool ParseBool(uint8_t** ptr) const; + string ParseString(uint8_t** ptr) const; + uint8_t* ParseByteArr(uint8_t** ptr, int length) const; + int16_t* ParseShortArr(uint8_t** ptr, int length) const; + uint16_t* ParseUnsignedShortArr(uint8_t** ptr, int length) const; + int32_t* ParseIntArr(uint8_t** ptr, int length) const; + int64_t* ParseLongArr(uint8_t** ptr, int length) const; + float* ParseFloatArr(uint8_t** ptr, int length) const; + double* ParseDoubleArr(uint8_t** ptr, int length) const; + bool* ParseBoolArr(uint8_t** ptr, int length) const; + + const ByteSwapper byte_swapper_; }; enum ObjectType { @@ -32,7 +58,7 @@ enum ObjectType { LONG = 4, FLOAT = 5, DOUBLE = 6, - UCHAR = 7, + USHORT = 7, BOOL = 8, STRING = 9, DATE = 11, @@ -42,7 +68,7 @@ enum ObjectType { LONG_ARR = 15, FLOAT_ARR = 16, DOUBLE_ARR = 17, - UCHAR_ARR = 18, + USHORT_ARR = 18, BOOL_ARR = 19, STRING_ARR = 20, DATE_ARR = 22, @@ -51,3 +77,5 @@ enum ObjectType { }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h index 944b3fe184..508b6e4a60 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -16,40 +16,69 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ +#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" namespace tensorflow { class Client { public: + Client(bool big_endian) : byte_swapper_(ByteSwapper(big_endian)){}; virtual Status Connect() = 0; virtual Status Disconnect() = 0; virtual bool IsConnected() = 0; virtual int GetSocketDescriptor() = 0; - virtual Status ReadData(uint8_t* buf, int32_t length) = 0; - virtual Status WriteData(uint8_t* buf, int32_t length) = 0; + virtual Status ReadData(uint8_t *buf, const int32_t length) = 0; + virtual Status WriteData(const uint8_t *buf, const int32_t length) = 0; - inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); } + inline Status ReadByte(uint8_t *data) { return ReadData(data, 1); } - inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); } + inline Status ReadShort(int16_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 2)); + byte_swapper_.SwapIfRequiredInt16(data); - inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); } + return Status::OK(); + } + + inline Status ReadInt(int32_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 4)); + byte_swapper_.SwapIfRequiredInt32(data); + + return Status::OK(); + } - inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); } + inline Status ReadLong(int64_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 8)); + byte_swapper_.SwapIfRequiredInt64(data); - inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); } + return Status::OK(); + } + + inline Status WriteByte(const uint8_t data) { return WriteData(&data, 1); } - inline Status WriteShort(int16_t data) { - return WriteData((uint8_t*)&data, 2); + inline Status WriteShort(const int16_t data) { + int16_t tmp = data; + byte_swapper_.SwapIfRequiredInt16(&tmp); + return WriteData((uint8_t *)&tmp, 2); } - inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); } + inline Status WriteInt(const int32_t data) { + int32_t tmp = data; + byte_swapper_.SwapIfRequiredInt32(&tmp); + return WriteData((uint8_t *)&tmp, 4); + } - inline Status WriteLong(int64_t data) { - return WriteData((uint8_t*)&data, 8); + inline Status WriteLong(const int64_t data) { + int64_t tmp = data; + byte_swapper_.SwapIfRequiredInt64(&tmp); + return WriteData((uint8_t *)&tmp, 8); } + + private: + const ByteSwapper byte_swapper_; }; } // namespace tensorflow -#endif +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc index f25f8a5b18..c4a7d3c513 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -13,40 +13,41 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_dataset_iterator.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { -IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name, - std::string host, int32 port, bool local, - int32 part, int32 page_size, std::string username, - std::string password, std::string certfile, - std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation) +IgniteDataset::IgniteDataset(OpKernelContext* ctx, string cache_name, + string host, int32 port, bool local, int32 part, + int32 page_size, string username, string password, + string certfile, string keyfile, + string cert_password, std::vector schema, + std::vector permutation, + DataTypeVector dtypes, + std::vector shapes) : DatasetBase(DatasetContext(ctx)), - cache_name_(cache_name), - host_(host), + cache_name_(std::move(cache_name)), + host_(std::move(host)), port_(port), local_(local), part_(part), page_size_(page_size), - username_(username), - password_(password), - certfile_(certfile), - keyfile_(keyfile), - cert_password_(cert_password), - schema_(schema), - permutation_(permutation) { - SchemaToTypes(); - SchemaToShapes(); - - LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name - << "', host='" << host << "', port=" << port << ", local=" << local - << ", part=" << part << ", page_size=" << page_size - << ", username='" << username << "', certfile='" << certfile - << "', keyfile='" << keyfile + "']"; + username_(std::move(username)), + password_(std::move(password)), + certfile_(std::move(certfile)), + keyfile_(std::move(keyfile)), + cert_password_(std::move(cert_password)), + schema_(std::move(schema)), + permutation_(std::move(permutation)), + dtypes_(dtypes), + shapes_(shapes) { + LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name_ + << "', host='" << host_ << "', port=" << port_ + << ", local=" << local_ << ", part=" << part_ + << ", page_size=" << page_size_ << ", username='" << username_ + << "', certfile='" << certfile_ << "', keyfile='" + << keyfile_ + "']"; } IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } @@ -54,10 +55,12 @@ IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } std::unique_ptr IgniteDataset::MakeIteratorInternal( const string& prefix) const { return std::unique_ptr(new IgniteDatasetIterator( - {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_, - this->cache_name_, this->local_, this->part_, this->page_size_, - this->username_, this->password_, this->certfile_, this->keyfile_, - this->cert_password_, this->schema_, this->permutation_)); + {this, strings::StrCat(prefix, "::Ignite")}, std::move(this->host_), + this->port_, std::move(this->cache_name_), this->local_, this->part_, + this->page_size_, std::move(this->username_), std::move(this->password_), + std::move(this->certfile_), std::move(this->keyfile_), + std::move(this->cert_password_), std::move(this->schema_), + std::move(this->permutation_))); } const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; } @@ -75,42 +78,4 @@ Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx, "IgniteDataset does not support 'AsGraphDefInternal'"); } -void IgniteDataset::SchemaToTypes() { - for (auto e : schema_) { - if (e == BYTE || e == BYTE_ARR) { - dtypes_.push_back(DT_UINT8); - } else if (e == SHORT || e == SHORT_ARR) { - dtypes_.push_back(DT_INT16); - } else if (e == INT || e == INT_ARR) { - dtypes_.push_back(DT_INT32); - } else if (e == LONG || e == LONG_ARR) { - dtypes_.push_back(DT_INT64); - } else if (e == FLOAT || e == FLOAT_ARR) { - dtypes_.push_back(DT_FLOAT); - } else if (e == DOUBLE || e == DOUBLE_ARR) { - dtypes_.push_back(DT_DOUBLE); - } else if (e == UCHAR || e == UCHAR_ARR) { - dtypes_.push_back(DT_UINT8); - } else if (e == BOOL || e == BOOL_ARR) { - dtypes_.push_back(DT_BOOL); - } else if (e == STRING || e == STRING_ARR) { - dtypes_.push_back(DT_STRING); - } else { - LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; - } - } -} - -void IgniteDataset::SchemaToShapes() { - for (auto e : schema_) { - if (e >= 1 && e < 10) { - shapes_.push_back(PartialTensorShape({})); - } else if (e >= 12 && e < 21) { - shapes_.push_back(PartialTensorShape({-1})); - } else { - LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; - } - } -} - } // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h index d3fec5910b..66bfdf2e2a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -13,18 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ + #include "tensorflow/core/framework/dataset.h" namespace tensorflow { class IgniteDataset : public DatasetBase { public: - IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host, + IgniteDataset(OpKernelContext* ctx, string cache_name, string host, int32 port, bool local, int32 part, int32 page_size, - std::string username, std::string password, - std::string certfile, std::string keyfile, - std::string cert_password, std::vector schema, - std::vector permutation); + string username, string password, string certfile, + string keyfile, string cert_password, std::vector schema, + std::vector permutation, DataTypeVector dtypes, + std::vector shapes); ~IgniteDataset(); std::unique_ptr MakeIteratorInternal( const string& prefix) const override; @@ -38,25 +41,23 @@ class IgniteDataset : public DatasetBase { Node** output) const override; private: - const std::string cache_name_; - const std::string host_; + const string cache_name_; + const string host_; const int32 port_; const bool local_; const int32 part_; const int32 page_size_; - const std::string username_; - const std::string password_; - const std::string certfile_; - const std::string keyfile_; - const std::string cert_password_; + const string username_; + const string password_; + const string certfile_; + const string keyfile_; + const string cert_password_; const std::vector schema_; const std::vector permutation_; - - DataTypeVector dtypes_; - std::vector shapes_; - - void SchemaToTypes(); - void SchemaToShapes(); + const DataTypeVector dtypes_; + const std::vector shapes_; }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc index 1774585ecd..f68ded5a3a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_dataset_iterator.h" - -#include "ignite_plain_client.h" -#include "ignite_ssl_wrapper.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h" +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/logging.h" #include @@ -25,30 +25,31 @@ limitations under the License. namespace tensorflow { IgniteDatasetIterator::IgniteDatasetIterator( - const Params& params, std::string host, int32 port, std::string cache_name, - bool local, int32 part, int32 page_size, std::string username, - std::string password, std::string certfile, std::string keyfile, - std::string cert_password, std::vector schema, - std::vector permutation) + const Params& params, string host, int32 port, string cache_name, + bool local, int32 part, int32 page_size, string username, string password, + string certfile, string keyfile, string cert_password, + std::vector schema, std::vector permutation) : DatasetIterator(params), - cache_name_(cache_name), + cache_name_(std::move(cache_name)), local_(local), part_(part), page_size_(page_size), - username_(username), - password_(password), - schema_(schema), - permutation_(permutation), + username_(std::move(username)), + password_(std::move(password)), + schema_(std::move(schema)), + permutation_(std::move(permutation)), remainder_(-1), cursor_id_(-1), - last_page_(false) { - Client* p_client = new PlainClient(host, port); + last_page_(false), + valid_state_(true) { + Client* p_client = new PlainClient(std::move(host), port, false); if (certfile.empty()) client_ = std::unique_ptr(p_client); else - client_ = std::unique_ptr(new SslWrapper( - std::unique_ptr(p_client), certfile, keyfile, cert_password)); + client_ = std::unique_ptr( + new SslWrapper(std::unique_ptr(p_client), std::move(certfile), + std::move(keyfile), std::move(cert_password), false)); LOG(INFO) << "Ignite Dataset Iterator created"; } @@ -60,12 +61,80 @@ IgniteDatasetIterator::~IgniteDatasetIterator() { LOG(INFO) << "Ignite Dataset Iterator destroyed"; } +Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) { + mutex_lock l(mutex_); + + if (valid_state_) { + Status status = + GetNextInternalWithValidState(ctx, out_tensors, end_of_sequence); + + if (!status.ok()) valid_state_ = false; + + return status; + } + + return errors::Unknown("Iterator is invalid"); +} + +Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { + return errors::Unimplemented( + "Iterator for IgniteDataset does not support 'SaveInternal'"); +} + +Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) { + return errors::Unimplemented( + "Iterator for IgniteDataset does not support 'RestoreInternal')"); +} + +Status IgniteDatasetIterator::GetNextInternalWithValidState( + IteratorContext* ctx, std::vector* out_tensors, + bool* end_of_sequence) { + if (remainder_ == 0 && last_page_) { + cursor_id_ = -1; + *end_of_sequence = true; + + return Status::OK(); + } else { + TF_RETURN_IF_ERROR(EstablishConnection()); + + if (remainder_ == -1) { + TF_RETURN_IF_ERROR(ScanQuery()); + } else if (remainder_ == 0) { + TF_RETURN_IF_ERROR(LoadNextPage()); + } + + uint8_t* initial_ptr = ptr_; + std::vector tensors; + std::vector types; + + TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types)); // Parse key + TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types)); // Parse val + + remainder_ -= (ptr_ - initial_ptr); + + TF_RETURN_IF_ERROR(CheckTypes(types)); + + for (size_t i = 0; i < tensors.size(); i++) + out_tensors->push_back(tensors[permutation_[i]]); + + *end_of_sequence = false; + + return Status::OK(); + } + + *end_of_sequence = true; + + return Status::OK(); +} + Status IgniteDatasetIterator::EstablishConnection() { if (!client_->IsConnected()) { - Status status = client_->Connect(); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(client_->Connect()); - status = Handshake(); + Status status = Handshake(); if (!status.ok()) { Status disconnect_status = client_->Disconnect(); if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); @@ -79,19 +148,17 @@ Status IgniteDatasetIterator::EstablishConnection() { Status IgniteDatasetIterator::CloseConnection() { if (cursor_id_ != -1 && !last_page_) { - Status conn_status = EstablishConnection(); - if (!conn_status.ok()) return conn_status; + TF_RETURN_IF_ERROR(EstablishConnection()); - TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length - TF_RETURN_IF_ERROR( - client_->WriteShort(close_connection_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteInt(kCloseConnectionReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kCloseConnectionOpcode)); TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Resource ID int32_t res_len; TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); - if (res_len < 12) - return errors::Internal("Close Resource Response is corrupted"); + if (res_len < kMinResLength) + return errors::Unknown("Close Resource Response is corrupted"); int64_t req_id; TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); @@ -100,22 +167,21 @@ Status IgniteDatasetIterator::CloseConnection() { if (status != 0) { uint8_t err_msg_header; TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); - if (err_msg_header == string_val) { + if (err_msg_header == kStringVal) { int32_t err_msg_length; TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); - std::string err_msg((char*)err_msg_c, err_msg_length); - delete[] err_msg_c; + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); - return errors::Internal("Close Resource Error [status=", status, - ", message=", err_msg, "]"); + return errors::Unknown("Close Resource Error [status=", status, + ", message=", err_msg, "]"); } - return errors::Internal("Close Resource Error [status=", status, "]"); + return errors::Unknown("Close Resource Error [status=", status, "]"); } - LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - cursor_id_ = -1; return client_->Disconnect(); @@ -126,94 +192,43 @@ Status IgniteDatasetIterator::CloseConnection() { return client_->IsConnected() ? client_->Disconnect() : Status::OK(); } -Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) { - if (remainder_ == 0 && last_page_) { - LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - - cursor_id_ = -1; - *end_of_sequence = true; - return Status::OK(); - } else { - Status status = EstablishConnection(); - if (!status.ok()) return status; - - if (remainder_ == -1 || remainder_ == 0) { - Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage(); - if (!status.ok()) return status; - } - - uint8_t* initial_ptr = ptr_; - std::vector types; - std::vector tensors; - - status = parser_.Parse(&ptr_, &tensors, &types); // Parse key - if (!status.ok()) return status; - - status = parser_.Parse(&ptr_, &tensors, &types); // Parse val - if (!status.ok()) return status; - - remainder_ -= (ptr_ - initial_ptr); - - out_tensors->resize(tensors.size()); - for (int32_t i = 0; i < tensors.size(); i++) - (*out_tensors)[permutation_[i]] = std::move(tensors[i]); - - *end_of_sequence = false; - return Status::OK(); - } - - *end_of_sequence = true; - return Status::OK(); -} - -Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { - return errors::Unimplemented( - "Iterator for IgniteDataset does not support 'SaveInternal'"); -} - -Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) { - return errors::Unimplemented( - "Iterator for IgniteDataset does not support 'RestoreInternal')"); -} - Status IgniteDatasetIterator::Handshake() { - int32_t msg_len = 8; + int32_t msg_len = kHandshakeReqDefaultLength; if (username_.empty()) msg_len += 1; else - msg_len += 5 + username_.length(); + msg_len += 5 + username_.length(); // 1 byte header, 4 bytes length. if (password_.empty()) msg_len += 1; else - msg_len += 5 + password_.length(); + msg_len += 5 + password_.length(); // 1 byte header, 4 bytes length. TF_RETURN_IF_ERROR(client_->WriteInt(msg_len)); TF_RETURN_IF_ERROR(client_->WriteByte(1)); - TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version)); - TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version)); - TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMajorVersion)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMinorVersion)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolPatchVersion)); TF_RETURN_IF_ERROR(client_->WriteByte(2)); if (username_.empty()) { - TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); } else { - TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal)); TF_RETURN_IF_ERROR(client_->WriteInt(username_.length())); TF_RETURN_IF_ERROR( - client_->WriteData((uint8_t*)username_.c_str(), username_.length())); + client_->WriteData(reinterpret_cast(username_.c_str()), + username_.length())); } if (password_.empty()) { - TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); } else { - TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal)); TF_RETURN_IF_ERROR(client_->WriteInt(password_.length())); TF_RETURN_IF_ERROR( - client_->WriteData((uint8_t*)password_.c_str(), password_.length())); + client_->WriteData(reinterpret_cast(password_.c_str()), + password_.length())); } int32_t handshake_res_len; @@ -221,9 +236,6 @@ Status IgniteDatasetIterator::Handshake() { uint8_t handshake_res; TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res)); - LOG(INFO) << "Handshake length " << handshake_res_len << ", res " - << (int16_t)handshake_res; - if (handshake_res != 1) { int16_t serv_ver_major; TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major)); @@ -234,26 +246,26 @@ Status IgniteDatasetIterator::Handshake() { uint8_t header; TF_RETURN_IF_ERROR(client_->ReadByte(&header)); - if (header == string_val) { + if (header == kStringVal) { int32_t length; TF_RETURN_IF_ERROR(client_->ReadInt(&length)); + uint8_t* err_msg_c = new uint8_t[length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length)); - std::string err_msg((char*)err_msg_c, length); - delete[] err_msg_c; - - return errors::Internal("Handshake Error [result=", handshake_res, - ", version=", serv_ver_major, ".", serv_ver_minor, - ".", serv_ver_patch, ", message='", err_msg, - "']"); - } else if (header == null_val) { - return errors::Internal("Handshake Error [result=", handshake_res, - ", version=", serv_ver_major, ".", serv_ver_minor, - ".", serv_ver_patch, "]"); + string err_msg(reinterpret_cast(err_msg_c), length); + + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, ", message='", err_msg, "']"); + } else if (header == kNullVal) { + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } else { - return errors::Internal("Handshake Error [result=", handshake_res, - ", version=", serv_ver_major, ".", serv_ver_minor, - ".", serv_ver_patch, "]"); + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } } @@ -261,31 +273,26 @@ Status IgniteDatasetIterator::Handshake() { } Status IgniteDatasetIterator::ScanQuery() { - TF_RETURN_IF_ERROR(client_->WriteInt(25)); // Message length - TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode)); // Operation code - TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteInt(kScanQueryReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kScanQueryOpcode)); + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID TF_RETURN_IF_ERROR( client_->WriteInt(JavaHashCode(cache_name_))); // Cache name TF_RETURN_IF_ERROR(client_->WriteByte(0)); // Flags - TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); // Filter object + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); // Filter object TF_RETURN_IF_ERROR(client_->WriteInt(page_size_)); // Cursor page size TF_RETURN_IF_ERROR(client_->WriteInt(part_)); // part_ition to query TF_RETURN_IF_ERROR(client_->WriteByte(local_)); // local_ flag - int64_t wait_start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - + uint64 wait_start = Env::Default()->NowMicros(); int32_t res_len; TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + int64_t wait_stop = Env::Default()->NowMicros(); - int64_t wait_stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) / 1000 << " ms"; - LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms"; - - if (res_len < 12) return errors::Internal("Scan Query Response is corrupted"); + if (res_len < kMinResLength) + return errors::Unknown("Scan Query Response is corrupted"); int64_t req_id; TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); @@ -297,78 +304,47 @@ Status IgniteDatasetIterator::ScanQuery() { uint8_t err_msg_header; TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); - if (err_msg_header == string_val) { + if (err_msg_header == kStringVal) { int32_t err_msg_length; TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); - std::string err_msg((char*)err_msg_c, err_msg_length); - delete[] err_msg_c; + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); - return errors::Internal("Scan Query Error [status=", status, ", message=", - err_msg, "]"); + return errors::Unknown("Scan Query Error [status=", status, ", message=", + err_msg, "]"); } - return errors::Internal("Scan Query Error [status=", status, "]"); + return errors::Unknown("Scan Query Error [status=", status, "]"); } TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_)); - LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened"; - int32_t row_cnt; TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder_ = res_len - 25; - page_ = std::unique_ptr(new uint8_t[remainder_]); - ptr_ = page_.get(); - - int64_t start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - - TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); - - int64_t stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - ; - - double size_in_mb = 1.0 * remainder_ / 1024 / 1024; - double time_in_s = 1.0 * (stop - start) / 1000; - LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 - << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; - - uint8_t last_page_b; - TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); - - last_page_ = !last_page_b; + int32_t page_size = res_len - kScanQueryResHeaderLength; - return Status::OK(); + return ReceivePage(page_size); } Status IgniteDatasetIterator::LoadNextPage() { - TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length - TF_RETURN_IF_ERROR( - client_->WriteShort(load_next_page_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteInt(kLoadNextPageReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kLoadNextPageOpcode)); TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Cursor ID - int64_t wait_start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - + uint64 wait_start = Env::Default()->NowMicros(); int32_t res_len; TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + uint64 wait_stop = Env::Default()->NowMicros(); - int64_t wait_stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) / 1000 + << " ms"; - LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms"; - - if (res_len < 12) - return errors::Internal("Load Next Page Response is corrupted"); + if (res_len < kMinResLength) + return errors::Unknown("Load Next Page Response is corrupted"); int64_t req_id; TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); @@ -380,41 +356,40 @@ Status IgniteDatasetIterator::LoadNextPage() { uint8_t err_msg_header; TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); - if (err_msg_header == string_val) { + if (err_msg_header == kStringVal) { int32_t err_msg_length; TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); - std::string err_msg((char*)err_msg_c, err_msg_length); - delete[] err_msg_c; + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); - return errors::Internal("Load Next Page Error [status=", status, - ", message=", err_msg, "]"); + return errors::Unknown("Load Next Page Error [status=", status, + ", message=", err_msg, "]"); } - return errors::Internal("Load Next Page Error [status=", status, "]"); + return errors::Unknown("Load Next Page Error [status=", status, "]"); } int32_t row_cnt; TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder_ = res_len - 17; + int32_t page_size = res_len - kLoadNextPageResHeaderLength; + + return ReceivePage(page_size); +} + +Status IgniteDatasetIterator::ReceivePage(int32_t page_size) { + remainder_ = page_size; page_ = std::unique_ptr(new uint8_t[remainder_]); ptr_ = page_.get(); - int64_t start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - + uint64 start = Env::Default()->NowMicros(); TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); - - int64_t stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - ; + uint64 stop = Env::Default()->NowMicros(); double size_in_mb = 1.0 * remainder_ / 1024 / 1024; - double time_in_s = 1.0 * (stop - start) / 1000; + double time_in_s = 1.0 * (stop - start) / 1000 / 1000; LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; @@ -426,7 +401,19 @@ Status IgniteDatasetIterator::LoadNextPage() { return Status::OK(); } -int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const { +Status IgniteDatasetIterator::CheckTypes(const std::vector& types) { + if (schema_.size() != types.size()) + return errors::Unknown("Object has unexpected schema"); + + for (size_t i = 0; i < schema_.size(); i++) { + if (schema_[i] != types[permutation_[i]]) + return errors::Unknown("Object has unexpected schema"); + } + + return Status::OK(); +} + +int32_t IgniteDatasetIterator::JavaHashCode(string str) const { int32_t h = 0; for (char& c : str) { h = 31 * h + c; diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h index 5858dbfcb9..c499e2c9cc 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -13,19 +13,22 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_binary_object_parser.h" -#include "ignite_client.h" -#include "ignite_dataset.h" +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" +#include "tensorflow/core/platform/mutex.h" namespace tensorflow { class IgniteDatasetIterator : public DatasetIterator { public: - IgniteDatasetIterator(const Params& params, std::string host, int32 port, - std::string cache_name, bool local, int32 part, - int32 page_size, std::string username, - std::string password, std::string certfile, - std::string keyfile, std::string cert_password, + IgniteDatasetIterator(const Params& params, string host, int32 port, + string cache_name, bool local, int32 part, + int32 page_size, string username, string password, + string certfile, string keyfile, string cert_password, std::vector schema, std::vector permutation); ~IgniteDatasetIterator(); @@ -38,15 +41,28 @@ class IgniteDatasetIterator : public DatasetIterator { IteratorStateReader* reader) override; private: + Status GetNextInternalWithValidState(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence); + + Status EstablishConnection(); + Status CloseConnection(); + Status Handshake(); + Status ScanQuery(); + Status LoadNextPage(); + Status ReceivePage(int32_t page_size); + Status CheckTypes(const std::vector& types); + int32_t JavaHashCode(string str) const; + std::unique_ptr client_; BinaryObjectParser parser_; - const std::string cache_name_; + const string cache_name_; const bool local_; const int32 part_; const int32 page_size_; - const std::string username_; - const std::string password_; + const string username_; + const string password_; const std::vector schema_; const std::vector permutation_; @@ -54,24 +70,30 @@ class IgniteDatasetIterator : public DatasetIterator { int64_t cursor_id_; bool last_page_; + bool valid_state_; + + mutex mutex_; + std::unique_ptr page_; uint8_t* ptr_; - - Status EstablishConnection(); - Status CloseConnection(); - Status Handshake(); - Status ScanQuery(); - Status LoadNextPage(); - int32_t JavaHashCode(std::string str) const; }; -constexpr uint8_t null_val = 101; -constexpr uint8_t string_val = 9; -constexpr uint8_t protocol_major_version = 1; -constexpr uint8_t protocol_minor_version = 1; -constexpr uint8_t protocol_patch_version = 0; -constexpr int16_t scan_query_opcode = 2000; -constexpr int16_t load_next_page_opcode = 2001; -constexpr int16_t close_connection_opcode = 0; +constexpr uint8_t kNullVal = 101; +constexpr uint8_t kStringVal = 9; +constexpr uint8_t kProtocolMajorVersion = 1; +constexpr uint8_t kProtocolMinorVersion = 1; +constexpr uint8_t kProtocolPatchVersion = 0; +constexpr int16_t kScanQueryOpcode = 2000; +constexpr int16_t kLoadNextPageOpcode = 2001; +constexpr int16_t kCloseConnectionOpcode = 0; +constexpr int32_t kScanQueryReqLength = 25; +constexpr int32_t kScanQueryResHeaderLength = 25; +constexpr int32_t kLoadNextPageReqLength = 18; +constexpr int32_t kLoadNextPageResHeaderLength = 17; +constexpr int32_t kCloseConnectionReqLength = 18; +constexpr int32_t kHandshakeReqDefaultLength = 8; +constexpr int32_t kMinResLength = 12; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index d03404a460..eeb29ef30b 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -13,29 +13,73 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" #include -#include "ignite_dataset.h" +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" #include "tensorflow/core/framework/dataset.h" namespace tensorflow { namespace { +Status SchemaToTypes(const std::vector& schema, DataTypeVector* dtypes) { + for (auto e : schema) { + if (e == BYTE || e == BYTE_ARR) { + dtypes->push_back(DT_UINT8); + } else if (e == SHORT || e == SHORT_ARR) { + dtypes->push_back(DT_INT16); + } else if (e == INT || e == INT_ARR) { + dtypes->push_back(DT_INT32); + } else if (e == LONG || e == LONG_ARR) { + dtypes->push_back(DT_INT64); + } else if (e == FLOAT || e == FLOAT_ARR) { + dtypes->push_back(DT_FLOAT); + } else if (e == DOUBLE || e == DOUBLE_ARR) { + dtypes->push_back(DT_DOUBLE); + } else if (e == USHORT || e == USHORT_ARR) { + dtypes->push_back(DT_UINT8); + } else if (e == BOOL || e == BOOL_ARR) { + dtypes->push_back(DT_BOOL); + } else if (e == STRING || e == STRING_ARR) { + dtypes->push_back(DT_STRING); + } else { + return errors::Unknown("Unexpected type in schema [type_id=", e, "]"); + } + } + + return Status::OK(); +} + +Status SchemaToShapes(const std::vector& schema, + std::vector* shapes) { + for (auto e : schema) { + if (e >= 1 && e < 10) { + shapes->push_back(PartialTensorShape({})); + } else if (e >= 12 && e < 21) { + shapes->push_back(PartialTensorShape({-1})); + } else { + return errors::Unknown("Unexpected type in schema [type_id=", e, "]"); + } + } + + return Status::OK(); +} + class IgniteDatasetOp : public DatasetOpKernel { public: using DatasetOpKernel::DatasetOpKernel; void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { - std::string cache_name = ""; - std::string host = ""; + string cache_name = ""; + string host = ""; int32 port = -1; bool local = false; int32 part = -1; int32 page_size = -1; - std::string username = ""; - std::string password = ""; - std::string certfile = ""; - std::string keyfile = ""; - std::string cert_password = ""; + string username = ""; + string password = ""; + string certfile = ""; + string keyfile = ""; + string cert_password = ""; const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME"); const char* env_host = std::getenv("IGNITE_DATASET_HOST"); @@ -50,15 +94,15 @@ class IgniteDatasetOp : public DatasetOpKernel { const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD"); if (env_cache_name) - cache_name = std::string(env_cache_name); + cache_name = string(env_cache_name); else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cache_name", - &cache_name)); + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "cache_name", &cache_name)); if (env_host) - host = std::string(env_host); + host = string(env_host); else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); if (env_port) port = atoi(env_port); @@ -82,34 +126,34 @@ class IgniteDatasetOp : public DatasetOpKernel { ParseScalarArgument(ctx, "page_size", &page_size)); if (env_username) - username = std::string(env_username); + username = string(env_username); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "username", &username)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "username", &username)); if (env_password) - password = std::string(env_password); + password = string(env_password); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "password", &password)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "password", &password)); if (env_certfile) - certfile = std::string(env_certfile); + certfile = string(env_certfile); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "certfile", &certfile)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "certfile", &certfile)); if (env_keyfile) - keyfile = std::string(env_keyfile); + keyfile = string(env_keyfile); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "keyfile", &keyfile)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "keyfile", &keyfile)); if (env_cert_password) - cert_password = std::string(env_cert_password); + cert_password = string(env_cert_password); else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", - &cert_password)); + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", + &cert_password)); const Tensor* schema_tensor; OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); @@ -124,19 +168,28 @@ class IgniteDatasetOp : public DatasetOpKernel { const Tensor* permutation_tensor; OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor)); - OP_REQUIRES(ctx, schema_tensor->dims() == 1, + OP_REQUIRES(ctx, permutation_tensor->dims() == 1, errors::InvalidArgument("`permutation` must be a vector.")); std::vector permutation; - permutation.reserve(permutation_tensor->NumElements()); + permutation.resize(permutation_tensor->NumElements()); for (int i = 0; i < permutation_tensor->NumElements(); i++) { - permutation.push_back(permutation_tensor->flat()(i)); + // Inversed permutation. + permutation[permutation_tensor->flat()(i)] = i; } - *output = - new IgniteDataset(ctx, cache_name, host, port, local, part, page_size, - username, password, certfile, keyfile, cert_password, - std::move(schema), std::move(permutation)); + DataTypeVector dtypes; + std::vector shapes; + + OP_REQUIRES_OK(ctx, SchemaToTypes(schema, &dtypes)); + OP_REQUIRES_OK(ctx, SchemaToShapes(schema, &shapes)); + + *output = new IgniteDataset( + ctx, std::move(cache_name), std::move(host), port, local, part, + page_size, std::move(username), std::move(password), + std::move(certfile), std::move(keyfile), std::move(cert_password), + std::move(schema), std::move(permutation), std::move(dtypes), + std::move(shapes)); } }; diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h index 6f417a3cb5..750ebe605a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -13,28 +13,31 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_client.h" +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ -#include +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" namespace tensorflow { class PlainClient : public Client { public: - PlainClient(std::string host, int port); + PlainClient(string host, int port, bool big_endian); ~PlainClient(); virtual Status Connect(); virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, int32_t length); - virtual Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length); + virtual Status WriteData(const uint8_t* buf, const int32_t length); private: - const std::string host_; + const string host_; const int port_; int sock_; }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc index a4c58a9563..e16c92307d 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" #include #include @@ -31,8 +31,8 @@ limitations under the License. namespace tensorflow { -PlainClient::PlainClient(std::string host, int port) - : host_(host), port_(port), sock_(-1) {} +PlainClient::PlainClient(string host, int port, bool big_endian) + : Client(big_endian), host_(std::move(host)), port_(port), sock_(-1) {} PlainClient::~PlainClient() { if (IsConnected()) { @@ -87,7 +87,7 @@ bool PlainClient::IsConnected() { return sock_ != -1; } int PlainClient::GetSocketDescriptor() { return sock_; } -Status PlainClient::ReadData(uint8_t* buf, int32_t length) { +Status PlainClient::ReadData(uint8_t* buf, const int32_t length) { int recieved = 0; while (recieved < length) { @@ -95,7 +95,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) { if (res < 0) return errors::Internal("Error occured while reading from socket: ", res, - ", ", std::string(strerror(errno))); + ", ", string(strerror(errno))); if (res == 0) return errors::Internal("Server closed connection"); @@ -106,7 +106,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) { return Status::OK(); } -Status PlainClient::WriteData(uint8_t* buf, int32_t length) { +Status PlainClient::WriteData(const uint8_t* buf, const int32_t length) { int sent = 0; while (sent < length) { @@ -114,7 +114,7 @@ Status PlainClient::WriteData(uint8_t* buf, int32_t length) { if (res < 0) return errors::Internal("Error occured while writing into socket: ", res, - ", ", std::string(strerror(errno))); + ", ", string(strerror(errno))); sent += res; buf += res; diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 8182fde6d9..9cd08a7779 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" #define WIN32_LEAN_AND_MEAN #include @@ -29,8 +29,11 @@ limitations under the License. namespace tensorflow { -PlainClient::PlainClient(std::string host, int port) - : host_(host), port_(port), sock_(INVALID_SOCKET) {} +PlainClient::PlainClient(string host, int port, bool big_endian) + : Client(big_endian), + host_(std::move(host)), + port_(port), + sock_(INVALID_SOCKET) {} PlainClient::~PlainClient() { if (IsConnected()) { @@ -55,6 +58,8 @@ Status PlainClient::Connect() { &result); if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); + auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); }); + for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); if (sock_ == INVALID_SOCKET) { @@ -72,8 +77,6 @@ Status PlainClient::Connect() { break; } - freeaddrinfo(result); - if (sock_ == INVALID_SOCKET) { WSACleanup(); return errors::Internal("Unable to connect to server"); @@ -99,7 +102,7 @@ bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; } int PlainClient::GetSocketDescriptor() { return sock_; } -Status PlainClient::ReadData(uint8_t *buf, int32_t length) { +Status PlainClient::ReadData(uint8_t *buf, const int32_t length) { int recieved = 0; while (recieved < length) { @@ -117,7 +120,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) { return Status::OK(); } -Status PlainClient::WriteData(uint8_t *buf, int32_t length) { +Status PlainClient::WriteData(const uint8_t *buf, const int32_t length) { int sent = 0; while (sent < length) { diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc index a2bc6b9609..28db509eaa 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_ssl_wrapper.h" +#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" @@ -29,13 +29,15 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) { return (strlen(buf)); } -SslWrapper::SslWrapper(std::shared_ptr client, std::string certfile, - std::string keyfile, std::string cert_password) - : client_(client), - certfile_(certfile), - keyfile_(keyfile), - cert_password_(cert_password), - ctx_(NULL) {} +SslWrapper::SslWrapper(std::shared_ptr client, string certfile, + string keyfile, string cert_password, bool big_endian) + : Client(big_endian), + client_(client), + certfile_(std::move(certfile)), + keyfile_(std::move(keyfile)), + cert_password_(std::move(cert_password)), + ctx_(nullptr), + ssl_(nullptr) {} SslWrapper::~SslWrapper() { if (IsConnected()) { @@ -43,9 +45,14 @@ SslWrapper::~SslWrapper() { if (!status.ok()) LOG(WARNING) << status.ToString(); } - if (ctx_ != NULL) { + if (ctx_ != nullptr) { SSL_CTX_free(ctx_); - ctx_ = NULL; + ctx_ = nullptr; + } + + if (ssl_ != nullptr) { + SSL_free(ssl_); + ssl_ = nullptr; } } @@ -63,7 +70,7 @@ Status SslWrapper::InitSslContext() { return errors::Internal("Couldn't load cetificate chain (file '", certfile_, "')"); - std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; + string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(), SSL_FILETYPE_PEM) != 1) return errors::Internal("Couldn't load private key (file '", @@ -94,6 +101,7 @@ Status SslWrapper::Connect() { Status SslWrapper::Disconnect() { SSL_free(ssl_); + ssl_ = nullptr; LOG(INFO) << "SSL connection closed"; @@ -104,7 +112,7 @@ bool SslWrapper::IsConnected() { return client_->IsConnected(); } int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); } -Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { +Status SslWrapper::ReadData(uint8_t *buf, const int32_t length) { int recieved = 0; while (recieved < length) { @@ -123,7 +131,7 @@ Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { return Status::OK(); } -Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { +Status SslWrapper::WriteData(const uint8_t *buf, const int32_t length) { int sent = 0; while (sent < length) { diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h index bbba6cc181..d59ce91aba 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -13,35 +13,39 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_client.h" +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" #include -#include namespace tensorflow { class SslWrapper : public Client { public: - SslWrapper(std::shared_ptr client, std::string certfile, - std::string keyfile, std::string cert_password); + SslWrapper(std::shared_ptr client, string certfile, string keyfile, + string cert_password, bool big_endian); ~SslWrapper(); virtual Status Connect(); virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, int32_t length); - virtual Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length); + virtual Status WriteData(const uint8_t* buf, const int32_t length); private: + Status InitSslContext(); + std::shared_ptr client_; - std::string certfile_; - std::string keyfile_; - std::string cert_password_; + string certfile_; + string keyfile_; + string cert_password_; SSL_CTX* ctx_; SSL* ssl_; - - Status InitSslContext(); }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ \ No newline at end of file diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc index fb16b290b1..7d18df11aa 100644 --- a/tensorflow/contrib/ignite/ops/dataset_ops.cc +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -37,6 +37,8 @@ REGISTER_OP("IgniteDataset") .SetIsStateful() .SetShapeFn(shape_inference::ScalarShape) .Doc(R"doc( +IgniteDataset that allows to get data from Apache Ignite. + Apache Ignite is a memory-centric distributed database, caching, and processing platform for transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py index 60003ca3b7..c0e24b1c69 100644 --- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -41,19 +41,19 @@ class Readable(): def read_byte(self): """Reads and returnes byte.""" - return self.__read("b", 1) + return self._read("b", 1) def read_short(self): """Reads and returns short (2 bytes, little-endian).""" - return self.__read("h", 2) + return self._read("h", 2) def read_int(self): """Reads and returns int (4 bytes, little-endian).""" - return self.__read("i", 4) + return self._read("i", 4) def read_long(self): """Reads and returns long (8 bytes, little-endian).""" - return self.__read("q", 8) + return self._read("q", 8) def skip(self, length): """Skips the specified number of bytes.""" @@ -64,7 +64,7 @@ class Readable(): """Reads the specified number of bytes and returns them as a buffer.""" return None - def __read(self, data_type, length): + def _read(self, data_type, length): """Reads, unpacks and returns specified type (little-endian).""" data_buffer = self.read_data(length) return struct.unpack("<" + data_type, data_buffer)[0] @@ -116,10 +116,10 @@ class TcpClient(Readable): self.sock = context.wrap_socket(self.sock) else: if keyfile is not None: - raise Exception("SSL is disabled, keyfile must not be specified \ + raise RuntimeError("SSL is disabled, keyfile must not be specified \ (to enable SSL specify certfile)") if password is not None: - raise Exception("SSL is disabled, password must not be specified \ + raise RuntimeError("SSL is disabled, password must not be specified \ (to enable SSL specify certfile)") self.host = host @@ -136,19 +136,19 @@ class TcpClient(Readable): def write_byte(self, v): """Writes the specified byte.""" - self.__write(v, "b") + self._write(v, "b") def write_short(self, v): """Writes the specified short (2 bytes, little-endian).""" - self.__write(v, "h") + self._write(v, "h") def write_int(self, v): """Writes the specified short (4 bytes, little-endian).""" - self.__write(v, "i") + self._write(v, "i") def write_long(self, v): """Writes the specified int (8 bytes, little-endian).""" - self.__write(v, "q") + self._write(v, "q") def write_string(self, v): """Writes the specified string.""" @@ -167,7 +167,7 @@ class TcpClient(Readable): data_buffer += buf return data_buffer - def __write(self, value, data_type): + def _write(self, value, data_type): """Packs and writes data using the specified type (little-endian).""" data_buffer = struct.pack("<" + data_type, value) self.sock.sendall(data_buffer) @@ -193,6 +193,7 @@ class BinaryField(): # Binary types defined in Apache Ignite Thin client and supported by # TensorFlow on Apache Ignite, see # https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +# True means that type is a vector, False means type is scalar. types = { 1: (dtypes.uint8, False), 2: (dtypes.int16, False), @@ -248,13 +249,13 @@ class TypeTreeNode(): dataset. """ if self.fields is None: - object_type = types[self.type_id] - if object_type is not None: + if self.type_id in types: + object_type = types[self.type_id] is_array = object_type[1] if is_array: return tensor_shape.TensorShape([None]) return tensor_shape.TensorShape([]) - raise Exception("Unsupported type [type_id=%d]" % self.type_id) + raise ValueError("Unsupported type [type_id=%d]" % self.type_id) output_shapes = {} for field in self.fields: output_shapes[field.name] = field.to_output_shapes() @@ -265,10 +266,10 @@ class TypeTreeNode(): dataset. """ if self.fields is None: - object_type = types[self.type_id] - if object_type is not None: + if self.type_id in types: + object_type = types[self.type_id] return object_type[0] - raise Exception("Unsupported type [type_id=%d]" % self.type_id) + raise ValueError("Unsupported type [type_id=%d]" % self.type_id) else: output_types = {} for field in self.fields: @@ -276,11 +277,11 @@ class TypeTreeNode(): return output_types def to_flat(self): - """Returns a list of leaf node types.""" + """Returns a list of node types.""" return self.to_flat_rec([]) def to_permutation(self): - """Returns a permutation that should be applied to order object leafs.""" + """Returns a permutation that should be applied to order object leaves.""" correct_order_dict = {} self.traversal_rec(correct_order_dict, 0) object_order = [] @@ -288,9 +289,10 @@ class TypeTreeNode(): return [correct_order_dict[o] for o in object_order] def to_flat_rec(self, flat): - """Formats a list of leaf node types.""" - flat.append(self.type_id) - if self.fields is not None: + """Formats a list of leaf node types in pre-order.""" + if self.fields is None: + flat.append(self.type_id) + else: for field in self.fields: field.to_flat_rec(flat) return flat @@ -320,8 +322,8 @@ class IgniteClient(TcpClient): have the same structure (homogeneous objects) and the cache contains at least one object. """ - def __init__(self, host, port, username=None, password=None, certfile=None,\ - keyfile=None, cert_password=None): + def __init__(self, host, port, username=None, password=None, certfile=None, + keyfile=None, cert_password=None): """Constructs a new instance of IgniteClient. Args: @@ -385,12 +387,13 @@ class IgniteClient(TcpClient): serv_ver_major = self.read_short() serv_ver_minor = self.read_short() serv_ver_patch = self.read_short() - err_msg = self.__parse_string() + err_msg = self._parse_string() if err_msg is None: - raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \ - % (res, serv_ver_major, serv_ver_minor, serv_ver_patch)) + raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d]" + % (res, serv_ver_major, serv_ver_minor, + serv_ver_patch)) else: - raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \ + raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d, \ message='%s']" % ( res, serv_ver_major, @@ -403,7 +406,7 @@ class IgniteClient(TcpClient): """Collects type information about objects stored in the specified cache. """ - cache_name_hash = self.__java_hash_code(cache_name) + cache_name_hash = self._java_hash_code(cache_name) self.write_int(25) # Message length self.write_short(2000) # Operation code self.write_long(0) # Request ID @@ -419,18 +422,18 @@ class IgniteClient(TcpClient): status = self.read_int() if status != 0: - err_msg = self.__parse_string() + err_msg = self._parse_string() if err_msg is None: - raise Exception("Scan Query Error [status=%s]" % status) + raise RuntimeError("Scan Query Error [status=%s]" % status) else: - raise Exception("Scan Query Error [status=%s, message='%s']" \ - % (status, err_msg)) + raise RuntimeError("Scan Query Error [status=%s, message='%s']" + % (status, err_msg)) self.read_long() # Cursor id row_count = self.read_int() if row_count == 0: - raise Exception("Scan Query returned empty result, so it's \ + raise RuntimeError("Scan Query returned empty result, so it's \ impossible to derive the cache type") payload = DataBuffer(self.read_data(result_length - 25)) @@ -438,20 +441,20 @@ class IgniteClient(TcpClient): self.read_byte() # Next page res = TypeTreeNode("root", 0, [ - self.__collect_types("key", payload), - self.__collect_types("val", payload) + self._collect_types("key", payload), + self._collect_types("val", payload) ], [0, 1]) return res - def __java_hash_code(self, s): + def _java_hash_code(self, s): """Computes hash code of the specified string using Java code.""" h = 0 for c in s: h = (31 * h + ord(c)) & 0xFFFFFFFF return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000 - def __collect_types(self, field_name, data): + def _collect_types(self, field_name, data): """Extracts type information from the specified object.""" type_id = data.read_byte() @@ -570,7 +573,7 @@ class IgniteClient(TcpClient): elif header == 101: pass else: - raise Exception("Unknown binary type when expected string \ + raise RuntimeError("Unknown binary type when expected string \ [type_id=%d]" % header) return TypeTreeNode(field_name, type_id) @@ -591,7 +594,7 @@ class IgniteClient(TcpClient): length = data.read_int() inner_data = data.read_data(length) data.read_int() # Offset - return self.__collect_types(field_name, DataBuffer(inner_data)) + return self._collect_types(field_name, DataBuffer(inner_data)) # Complex Object. if type_id == 103: @@ -603,11 +606,11 @@ class IgniteClient(TcpClient): data.read_int() # Object schema id obj_schema_offset = data.read_int() - obj_type = self.__get_type(obj_type_id) + obj_type = self._get_type(obj_type_id) children = [] for obj_field in obj_type.fields: - child = self.__collect_types(obj_field.field_name, data) + child = self._collect_types(obj_field.field_name, data) children.append(child) children_sorted = sorted(children, key=lambda child: child.name) @@ -618,9 +621,9 @@ class IgniteClient(TcpClient): return TypeTreeNode(field_name, type_id, children, permutation) - raise Exception("Unknown binary type [type_id=%d]" % type_id) + raise RuntimeError("Unknown binary type [type_id=%d]" % type_id) - def __get_type(self, type_id): + def _get_type(self, type_id): """Queries Apache Ignite information about type by type id.""" self.write_int(14) # Message length self.write_short(3002) # Operation code @@ -632,25 +635,25 @@ class IgniteClient(TcpClient): status = self.read_int() if status != 0: - err_msg = self.__parse_string() + err_msg = self._parse_string() if err_msg is None: - raise Exception("Get Binary Type Error [status=%d, message='%s']" \ - % (status, err_msg)) + raise RuntimeError("Get Binary Type Error [status=%d, message='%s']" + % (status, err_msg)) else: - raise Exception("Get Binary Type Error [status=%d]" % status) + raise RuntimeError("Get Binary Type Error [status=%d]" % status) binary_type_exists = self.read_byte() if binary_type_exists == 0: - raise Exception("Binary type not found [type_id=%d] " % type_id) + raise RuntimeError("Binary type not found [type_id=%d] " % type_id) binary_type_id = self.read_int() - binary_type_name = self.__parse_string() - self.__parse_string() # Affinity field name + binary_type_name = self._parse_string() + self._parse_string() # Affinity field name fields = [] for _ in range(self.read_int()): - field_name = self.__parse_string() + field_name = self._parse_string() field_type_id = self.read_int() field_id = self.read_int() @@ -659,7 +662,7 @@ class IgniteClient(TcpClient): is_enum = self.read_byte() if is_enum == 1: - raise Exception("Enum fields are not supported yet") + raise RuntimeError("Enum fields are not supported yet") schema_cnt = self.read_int() for _ in range(schema_cnt): @@ -669,7 +672,7 @@ class IgniteClient(TcpClient): return BinaryType(binary_type_id, binary_type_name, fields) - def __parse_string(self): + def _parse_string(self): """Parses string.""" header = self.read_byte() if header == 9: @@ -677,8 +680,8 @@ class IgniteClient(TcpClient): return self.read_data(length).decode("utf-8") if header == 101: return None - raise Exception("Unknown binary type when expected string [type_id=%d]" \ - % header) + raise RuntimeError("Unknown binary type when expected string [type_id=%d]" + % header) class IgniteDataset(Dataset): """Apache Ignite is a memory-centric distributed database, caching, and @@ -692,9 +695,9 @@ class IgniteDataset(Dataset): Ignite Binary Client Protocol. """ - def __init__(self, cache_name, host="localhost", port=10800, local=False,\ - part=-1, page_size=100, username=None, password=None, certfile=None,\ - keyfile=None, cert_password=None): + def __init__(self, cache_name, host="localhost", port=10800, local=False, + part=-1, page_size=100, username=None, password=None, + certfile=None, keyfile=None, cert_password=None): """Create a IgniteDataset. Args: @@ -716,39 +719,44 @@ class IgniteDataset(Dataset): """ super(IgniteDataset, self).__init__() - with IgniteClient(host, port, username, password, certfile, keyfile,\ - cert_password) as client: + with IgniteClient(host, port, username, password, certfile, keyfile, + cert_password) as client: client.handshake() self.cache_type = client.get_cache_type(cache_name) - self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\ - name="cache_name") + self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string, + name="cache_name") self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host") self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port") self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local") self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") - self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\ - name="page_size") - self.username = ops.convert_to_tensor("" if username is None else username,\ - dtype=dtypes.string, name="username") - self.password = ops.convert_to_tensor("" if password is None else password,\ - dtype=dtypes.string, name="password") - self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\ - dtype=dtypes.string, name="certfile") - self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\ - dtype=dtypes.string, name="keyfile") - self.cert_password = ops.convert_to_tensor("" if cert_password is None\ - else cert_password, dtype=dtypes.string, name="cert_password") - self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\ - dtype=dtypes.int32, name="schema") - self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\ - dtype=dtypes.int32, name="permutation") + self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32, + name="page_size") + self.username = ops.convert_to_tensor("" if username is None else username, + dtype=dtypes.string, name="username") + self.password = ops.convert_to_tensor("" if password is None else password, + dtype=dtypes.string, name="password") + self.certfile = ops.convert_to_tensor("" if certfile is None else certfile, + dtype=dtypes.string, name="certfile") + self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile, + dtype=dtypes.string, name="keyfile") + self.cert_password = ops.convert_to_tensor("" if cert_password is None + else cert_password, + dtype=dtypes.string, + name="cert_password") + self.schema = ops.convert_to_tensor(self.cache_type.to_flat(), + dtype=dtypes.int32, name="schema") + self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(), + dtype=dtypes.int32, + name="permutation") def _as_variant_tensor(self): - return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\ - self.port, self.local, self.part, self.page_size, self.username,\ - self.password, self.certfile, self.keyfile, self.cert_password,\ - self.schema, self.permutation) + return gen_dataset_ops.ignite_dataset(self.cache_name, self.host, + self.port, self.local, self.part, + self.page_size, self.username, + self.password, self.certfile, + self.keyfile, self.cert_password, + self.schema, self.permutation) @property def output_classes(self): -- GitLab From 49410e6bbed9020d5705303a533d43312c46f886 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 12 Sep 2018 17:42:46 +0000 Subject: [PATCH 0118/1357] Fix pylint error Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/tensor_array_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index b47e750f4b..0ad2063558 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -1506,7 +1506,7 @@ class TensorArrayTest(test.TestCase): def testTensorArrayInt64GPU(self): if not test.is_gpu_available(): - return + return with self.test_session(use_gpu=True, force_gpu=True) as sess: value = array_ops.placeholder(dtypes.int64) ta = tensor_array_ops.TensorArray(dtype=dtypes.int64, size=2) -- GitLab From f832a9b3743fbb160eff5e9775457b4769ea2e81 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 12 Sep 2018 12:49:41 -0700 Subject: [PATCH 0119/1357] Update RELEASE.md --- RELEASE.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 763ef3b279..bdc23795e5 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,9 @@ +# Release 1.10.1 +## Bug Fixes and Other Changes + +* `tf.keras`: + * Fixing keras on Cloud TPUs. No new binaries will be built for Windows. + # Release 1.10.0 ## Major Features And Improvements -- GitLab From 626bc997c28e1dfeaa85041e6c5a057fec7e0a02 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 00:05:23 -0700 Subject: [PATCH 0120/1357] Move from deprecated self.test_session() to self.cached_session(). self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about: * the fact that the session may be reused. * the session is not closed even when doing a "with self.test_session()" statement. PiperOrigin-RevId: 212766976 --- .../python/kernel_tests/accumulate_n_test.py | 12 +- .../python/kernel_tests/ackermann_test.py | 2 +- .../python/kernel_tests/argmax_op_test.py | 6 +- .../python/kernel_tests/array_ops_test.py | 56 ++-- .../python/kernel_tests/as_string_op_test.py | 12 +- .../kernel_tests/atrous_convolution_test.py | 2 +- .../python/kernel_tests/attention_ops_test.py | 4 +- .../python/kernel_tests/barrier_ops_test.py | 32 +-- .../python/kernel_tests/base64_ops_test.py | 6 +- .../python/kernel_tests/basic_gpu_test.py | 4 +- .../kernel_tests/batch_gather_op_test.py | 2 +- .../kernel_tests/batchtospace_op_test.py | 6 +- .../python/kernel_tests/bcast_ops_test.py | 4 +- .../python/kernel_tests/betainc_op_test.py | 12 +- .../python/kernel_tests/bincount_op_test.py | 2 +- .../candidate_sampler_ops_test.py | 12 +- .../python/kernel_tests/cast_op_test.py | 10 +- .../kernel_tests/checkpoint_ops_test.py | 32 +-- .../python/kernel_tests/clip_ops_test.py | 4 +- .../python/kernel_tests/concat_op_test.py | 28 +- .../python/kernel_tests/cond_v2_test.py | 4 +- .../conditional_accumulator_test.py | 38 +-- .../kernel_tests/confusion_matrix_test.py | 28 +- .../python/kernel_tests/constant_op_test.py | 52 ++-- .../kernel_tests/control_flow_ops_py_test.py | 248 ++++++++--------- tensorflow/python/kernel_tests/conv1d_test.py | 2 +- .../conv2d_backprop_filter_grad_test.py | 2 +- .../kernel_tests/conv2d_transpose_test.py | 8 +- .../conv3d_backprop_filter_v2_grad_test.py | 2 +- .../kernel_tests/conv3d_transpose_test.py | 10 +- .../python/kernel_tests/conv_ops_3d_test.py | 4 +- .../python/kernel_tests/conv_ops_test.py | 4 +- .../python/kernel_tests/cross_grad_test.py | 2 +- .../python/kernel_tests/cwise_ops_test.py | 56 ++-- .../python/kernel_tests/decode_bmp_op_test.py | 4 +- .../kernel_tests/decode_compressed_op_test.py | 4 +- .../python/kernel_tests/decode_csv_op_test.py | 2 +- .../kernel_tests/decode_image_op_test.py | 2 +- .../python/kernel_tests/decode_png_op_test.py | 2 +- .../python/kernel_tests/decode_raw_op_test.py | 12 +- .../dense_update_ops_no_tsan_test.py | 8 +- .../kernel_tests/dense_update_ops_test.py | 6 +- .../kernel_tests/division_future_test.py | 2 +- .../python/kernel_tests/division_past_test.py | 2 +- .../python/kernel_tests/duplicate_op_test.py | 2 +- .../kernel_tests/dynamic_partition_op_test.py | 8 +- .../kernel_tests/dynamic_stitch_op_test.py | 4 +- .../python/kernel_tests/embedding_ops_test.py | 60 ++-- .../extract_image_patches_grad_test.py | 2 +- .../python/kernel_tests/fft_ops_test.py | 4 +- .../python/kernel_tests/fifo_queue_test.py | 128 ++++----- .../fractional_avg_pool_op_test.py | 18 +- .../fractional_max_pool_op_test.py | 18 +- .../python/kernel_tests/gather_op_test.py | 4 +- .../kernel_tests/gradient_correctness_test.py | 8 +- .../kernel_tests/identity_n_op_py_test.py | 8 +- .../kernel_tests/identity_op_py_test.py | 10 +- .../python/kernel_tests/in_topk_op_test.py | 6 +- .../python/kernel_tests/init_ops_test.py | 2 +- .../python/kernel_tests/inplace_ops_test.py | 2 +- tensorflow/python/kernel_tests/io_ops_test.py | 8 +- .../python/kernel_tests/linalg_grad_test.py | 2 +- .../python/kernel_tests/linalg_ops_test.py | 2 +- .../python/kernel_tests/listdiff_op_test.py | 2 +- .../python/kernel_tests/logging_ops_test.py | 4 +- .../python/kernel_tests/lookup_ops_test.py | 156 +++++------ tensorflow/python/kernel_tests/losses_test.py | 216 +++++++-------- .../python/kernel_tests/manip_ops_test.py | 16 +- .../python/kernel_tests/matmul_op_test.py | 2 +- .../kernel_tests/matrix_inverse_op_test.py | 2 +- .../matrix_triangular_solve_op_test.py | 6 +- .../python/kernel_tests/metrics_test.py | 258 +++++++++--------- tensorflow/python/kernel_tests/pad_op_test.py | 2 +- .../kernel_tests/padding_fifo_queue_test.py | 124 ++++----- .../parse_single_example_op_test.py | 4 +- .../python/kernel_tests/parsing_ops_test.py | 18 +- .../partitioned_variables_test.py | 40 +-- .../kernel_tests/priority_queue_test.py | 20 +- .../python/kernel_tests/reader_ops_test.py | 36 +-- .../python/kernel_tests/record_input_test.py | 14 +- .../kernel_tests/reduce_join_op_test.py | 16 +- .../python/kernel_tests/reduction_ops_test.py | 30 +- .../kernel_tests/regex_full_match_op_test.py | 6 +- .../python/kernel_tests/relu_op_test.py | 36 +-- .../python/kernel_tests/reshape_op_test.py | 2 +- .../kernel_tests/reverse_sequence_op_test.py | 4 +- .../kernel_tests/scatter_nd_ops_test.py | 32 +-- .../segment_reduction_ops_test.py | 12 +- .../python/kernel_tests/session_ops_test.py | 32 +-- tensorflow/python/kernel_tests/sets_test.py | 10 +- .../python/kernel_tests/shape_ops_test.py | 34 +-- .../python/kernel_tests/slice_op_test.py | 4 +- .../python/kernel_tests/softmax_op_test.py | 4 +- .../python/kernel_tests/softplus_op_test.py | 8 +- .../python/kernel_tests/softsign_op_test.py | 4 +- .../kernel_tests/spacetobatch_op_test.py | 4 +- .../sparse_conditional_accumulator_test.py | 40 +-- .../kernel_tests/sparse_cross_op_test.py | 34 +-- .../kernel_tests/sparse_matmul_op_test.py | 2 +- .../python/kernel_tests/sparse_ops_test.py | 2 +- .../sparse_to_dense_op_py_test.py | 16 +- .../python/kernel_tests/sparsemask_op_test.py | 2 +- .../kernel_tests/string_join_op_test.py | 2 +- .../kernel_tests/string_length_op_test.py | 2 +- .../kernel_tests/string_split_op_test.py | 30 +- .../kernel_tests/string_strip_op_test.py | 6 +- .../string_to_hash_bucket_op_test.py | 14 +- .../kernel_tests/string_to_number_op_test.py | 2 +- .../python/kernel_tests/substr_op_test.py | 28 +- .../python/kernel_tests/summary_ops_test.py | 6 +- .../kernel_tests/summary_tensor_op_test.py | 14 +- .../python/kernel_tests/tensordot_op_test.py | 6 +- .../python/kernel_tests/transpose_op_test.py | 4 +- .../python/kernel_tests/unique_op_test.py | 20 +- .../python/kernel_tests/unstack_op_test.py | 8 +- .../python/kernel_tests/variable_ops_test.py | 4 +- .../kernel_tests/variable_scope_test.py | 60 ++-- .../python/kernel_tests/variables_test.py | 58 ++-- .../kernel_tests/weights_broadcast_test.py | 8 +- .../python/kernel_tests/xent_op_test.py | 10 +- 120 files changed, 1292 insertions(+), 1292 deletions(-) diff --git a/tensorflow/python/kernel_tests/accumulate_n_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py index b793906fac..0bc5268f38 100644 --- a/tensorflow/python/kernel_tests/accumulate_n_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_test.py @@ -76,7 +76,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): # Putting them here so that everything that exercises AccumulateNV2 is in # one place and the default build runs all unit tests. def testSimple(self): - with self.test_session(): + with self.cached_session(): random_arrays = [ np.random.rand(16, 16, 16, 16).astype(np.float32) for _ in range(20) ] @@ -91,27 +91,27 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): self.assertAllClose(np_val, tf_val.eval()) def testZeroArgs(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): tf_val = math_ops.accumulate_n([]) tf_val.eval() def testWrongShape(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): a = variables.Variable(0.2) b = variables.Variable(0.1) math_ops.accumulate_n([a, b], shape=[2, 2]) # Should be shape=[] def testIncompatibleShapes(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): a = variables.Variable(np.array([0.1, 0.2])) b = variables.Variable(np.array([[0.3], [0.4]])) math_ops.accumulate_n([a, b]) def testWrongType(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) @@ -119,7 +119,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) math_ops.accumulate_n([a], tensor_dtype=np.int32) diff --git a/tensorflow/python/kernel_tests/ackermann_test.py b/tensorflow/python/kernel_tests/ackermann_test.py index 5e0d87c783..d267e49752 100644 --- a/tensorflow/python/kernel_tests/ackermann_test.py +++ b/tensorflow/python/kernel_tests/ackermann_test.py @@ -34,7 +34,7 @@ class AckermannTest(test.TestCase): self.assertEqual(len(ackermann.OP_LIST.op), 1) self.assertEqual(ackermann.OP_LIST.op[0].name, 'Ackermann') - with self.test_session(): + with self.cached_session(): self.assertEqual(ackermann.ackermann().eval(), b'A(m, 0) == A(m-1, 1)') diff --git a/tensorflow/python/kernel_tests/argmax_op_test.py b/tensorflow/python/kernel_tests/argmax_op_test.py index 1202c463e8..127d14c250 100644 --- a/tensorflow/python/kernel_tests/argmax_op_test.py +++ b/tensorflow/python/kernel_tests/argmax_op_test.py @@ -104,20 +104,20 @@ class ArgMaxTest(test.TestCase): self._testDim(np.int64) def testEmpty(self): - with self.test_session(): + with self.cached_session(): for op in math_ops.argmin, math_ops.argmax: with self.assertRaisesOpError( r"Reduction axis 0 is empty in shape \[0\]"): op([], 0).eval() def testDefaultAxis(self): - with self.test_session(): + with self.cached_session(): for op in math_ops.argmin, math_ops.argmax: ans = op([1]).eval() self.assertAllEqual(ans, 0) def testOutputEmpty(self): - with self.test_session(): + with self.cached_session(): for op in math_ops.argmin, math_ops.argmax: ret = op(array_ops.zeros(shape=[1, 0, 2]), axis=-1).eval() self.assertEqual(ret.shape, (1, 0)) diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index a164682227..573bb8614f 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -50,7 +50,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase): def testNonBatchMatrix(self): matrix = [[1, 2, 3], [4, 5, 6]] # Shape (2, 3) expected_transposed = [[1, 4], [2, 5], [3, 6]] # Shape (3, 2) - with self.test_session(): + with self.cached_session(): transposed = array_ops.matrix_transpose(matrix) self.assertEqual((3, 2), transposed.get_shape()) self.assertAllEqual(expected_transposed, transposed.eval()) @@ -58,7 +58,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase): def testConjugate(self): m = [[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, 6 + 6j]] expected_transposed = [[1 - 1j, 4 - 4j], [2 - 2j, 5 - 5j], [3 - 3j, 6 - 6j]] - with self.test_session(): + with self.cached_session(): matrix = ops.convert_to_tensor(m) transposed = array_ops.matrix_transpose(matrix, conjugate=True) self.assertEqual((3, 2), transposed.get_shape()) @@ -71,7 +71,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase): matrix_1_t = [[11, 44], [22, 55], [33, 66]] batch_matrix = [matrix_0, matrix_1] # Shape (2, 2, 3) expected_transposed = [matrix_0_t, matrix_1_t] # Shape (2, 3, 2) - with self.test_session(): + with self.cached_session(): transposed = array_ops.matrix_transpose(batch_matrix) self.assertEqual((2, 3, 2), transposed.get_shape()) self.assertAllEqual(expected_transposed, transposed.eval()) @@ -79,7 +79,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase): def testNonBatchMatrixDynamicallyDefined(self): matrix = [[1, 2, 3], [4, 5, 6]] # Shape (2, 3) expected_transposed = [[1, 4], [2, 5], [3, 6]] # Shape (3, 2) - with self.test_session(): + with self.cached_session(): matrix_ph = array_ops.placeholder(dtypes.int32) transposed = array_ops.matrix_transpose(matrix_ph) self.assertAllEqual( @@ -94,7 +94,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase): matrix_1_t = [[11, 44], [22, 55], [33, 66]] batch_matrix = [matrix_0, matrix_1] # Shape (2, 2, 3) expected_transposed = [matrix_0_t, matrix_1_t] # Shape (2, 3, 2) - with self.test_session(): + with self.cached_session(): batch_matrix_ph = array_ops.placeholder(dtypes.int32) transposed = array_ops.matrix_transpose(batch_matrix_ph) self.assertAllEqual( @@ -105,7 +105,7 @@ class BatchMatrixTransposeTest(test_util.TensorFlowTestCase): def testTensorWithStaticRankLessThanTwoRaisesBecauseNotAMatrix(self): vector = [1, 2, 3] - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "should be a "): array_ops.matrix_transpose(vector) @@ -129,7 +129,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): masked_arr = arr[:, mask] elif axis == 2: masked_arr = arr[:, :, mask] - with self.test_session(): + with self.cached_session(): masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime @@ -176,7 +176,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): numpy_result = arr[mask] tf_result = array_ops.boolean_mask(arr, mask) self.assertAllEqual(numpy_result.shape[1:], tf_result.get_shape()[1:]) - with self.test_session(): + with self.cached_session(): self.assertAllClose(numpy_result, tf_result.eval()) def testEmptyInput1D(self): @@ -185,7 +185,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): numpy_result = arr[mask] tf_result = array_ops.boolean_mask(arr, mask) self.assertAllEqual(numpy_result.shape[1:], tf_result.get_shape()[1:]) - with self.test_session(): + with self.cached_session(): self.assertAllClose(numpy_result, tf_result.eval()) def testEmptyOutput(self): @@ -199,7 +199,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self): # The rank of the mask tensor must be specified. This is explained # in the docstring as well. - with self.test_session() as sess: + with self.cached_session() as sess: ph_tensor = array_ops.placeholder(dtypes.int32, shape=None) ph_mask = array_ops.placeholder(dtypes.bool, shape=[None]) @@ -217,7 +217,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def testMaskDimensionsSetToNoneRaises(self): # The rank of the mask tensor must be specified. This is explained # in the docstring as well. - with self.test_session(): + with self.cached_session(): tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2]) mask = array_ops.placeholder(dtypes.bool, shape=None) with self.assertRaisesRegexp(ValueError, "dimensions must be specified"): @@ -226,21 +226,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): def testMaskHasMoreDimsThanTensorRaises(self): mask = [[True, True], [False, False]] tensor = [1, 2, 3, 4] - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "incompatible"): array_ops.boolean_mask(tensor, mask).eval() def testMaskIsScalarRaises(self): mask = True tensor = 1 - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "mask.*scalar"): array_ops.boolean_mask(tensor, mask).eval() def testMaskShapeDifferentThanFirstPartOfTensorShapeRaises(self): mask = [True, True, True] tensor = [[1, 2], [3, 4]] - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "incompatible"): array_ops.boolean_mask(tensor, mask).eval() @@ -345,7 +345,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase): def testInvalid(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) axis = array_ops.placeholder(dtypes.int32) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of valid range"): array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]}) @@ -954,7 +954,7 @@ class StridedSliceAssignChecker(object): class SliceAssignTest(test_util.TensorFlowTestCase): def testInvalidSlice(self): - with self.test_session() as sess: + with self.cached_session() as sess: foo = constant_op.constant([1, 2, 3]) with self.assertRaisesRegexp(ValueError, "Sliced assignment" " is only supported for variables"): @@ -1000,7 +1000,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp( errors.FailedPreconditionError, "Attempting to use uninitialized value Variable"): - with self.test_session() as sess: + with self.cached_session() as sess: v = variables.Variable([1, 2]) sess.run(v[:].assign([1, 2])) @@ -1019,7 +1019,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase): too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8) too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64) v = resource_variable_ops.ResourceVariable(init_val) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(v.initializer) with self.assertRaises(ValueError): sess.run(v[:].assign(too_large_val)) @@ -1066,12 +1066,12 @@ class ShapeSizeRankTest(test_util.TensorFlowTestCase): class SequenceMaskTest(test_util.TensorFlowTestCase): def testExceptions(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "maxlen must be scalar"): array_ops.sequence_mask([10, 20], [10, 20]) def testOneDimensionalWithMaxlen(self): - with self.test_session(): + with self.cached_session(): res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual( @@ -1081,7 +1081,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): @test_util.enable_c_shapes def testOneDimensionalDtypeWithoutMaxlen(self): - with self.test_session(): + with self.cached_session(): # test dtype and default maxlen: res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]), dtype=dtypes.float32) @@ -1092,7 +1092,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): @test_util.enable_c_shapes def testOneDimensionalWithoutMaxlen(self): - with self.test_session(): + with self.cached_session(): res = array_ops.sequence_mask( constant_op.constant([0, 1, 4])) self.assertAllEqual(res.get_shape().as_list(), [3, 4]) @@ -1104,7 +1104,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): @test_util.enable_c_shapes def testTwoDimensional(self): - with self.test_session(): + with self.cached_session(): res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5) self.assertAllEqual(res.get_shape(), [1, 3, 5]) self.assertAllEqual(res.eval(), [[[True, False, False, False, False], [ @@ -1137,7 +1137,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]]) - with self.test_session(): + with self.cached_session(): check_dtypes(dtypes.int32, dtypes.int32) check_dtypes(dtypes.int32, dtypes.int64) check_dtypes(dtypes.int64, dtypes.int32) @@ -1216,7 +1216,7 @@ class UnravelIndexTest(test_util.TensorFlowTestCase): # TODO(b/73086570): Reenable test. @unittest.skip("Test does not pass internally.") def testUnravelIndex(self): - with self.test_session(): + with self.cached_session(): for dtype in [dtypes.int32, dtypes.int64]: indices_1 = constant_op.constant(1621, dtype=dtype) dims_1 = constant_op.constant([6, 7, 8, 9], dtype=dtype) @@ -1237,13 +1237,13 @@ class UnravelIndexTest(test_util.TensorFlowTestCase): class GuaranteeConstOpTest(test_util.TensorFlowTestCase): def testSimple(self): - with self.test_session(): + with self.cached_session(): a = array_ops.constant(10) guarantee_a = array_ops.guarantee_const(a) self.assertEqual(10, guarantee_a.eval()) def testVariables(self): - with self.test_session() as sess: + with self.cached_session() as sess: for use_resource in [False, True]: a = variable_scope.get_variable( "var_{}".format(use_resource), [], @@ -1254,7 +1254,7 @@ class GuaranteeConstOpTest(test_util.TensorFlowTestCase): self.assertEqual(10.0, guarantee_a.eval()) def testResourceRejection(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = variable_scope.get_variable( "resource_var", [], initializer=init_ops.constant_initializer(10.0), diff --git a/tensorflow/python/kernel_tests/as_string_op_test.py b/tensorflow/python/kernel_tests/as_string_op_test.py index 51aa17babe..dd4a90e5f6 100644 --- a/tensorflow/python/kernel_tests/as_string_op_test.py +++ b/tensorflow/python/kernel_tests/as_string_op_test.py @@ -32,7 +32,7 @@ class AsStringOpTest(test.TestCase): 0, 1, -1, 0.5, 0.25, 0.125, float("INF"), float("NAN"), float("-INF") ] - with self.test_session(): + with self.cached_session(): for dtype in (dtypes.float32, dtypes.float64): input_ = array_ops.placeholder(dtype) @@ -84,7 +84,7 @@ class AsStringOpTest(test.TestCase): int_inputs_ = [0, -1, 1, -128, 127, -101, 101, -0] s = lambda strs: [x.decode("ascii") for x in strs] - with self.test_session(): + with self.cached_session(): for dtype in (dtypes.int32, dtypes.int64, dtypes.int8): input_ = array_ops.placeholder(dtype) @@ -117,7 +117,7 @@ class AsStringOpTest(test.TestCase): # testing int8 s = lambda strs: [x.decode("ascii") for x in strs] - with self.test_session(): + with self.cached_session(): input_ = array_ops.placeholder(dtypes.int32) int_inputs_ = [np.iinfo(np.int32).min, np.iinfo(np.int32).max] output = string_ops.as_string(input_) @@ -133,7 +133,7 @@ class AsStringOpTest(test.TestCase): def testHalfInt(self): s = lambda strs: [x.decode("ascii") for x in strs] - with self.test_session(): + with self.cached_session(): input_ = array_ops.placeholder(dtypes.int16) int_inputs_ = [np.iinfo(np.int16).min, np.iinfo(np.int16).max] output = string_ops.as_string(input_) @@ -144,7 +144,7 @@ class AsStringOpTest(test.TestCase): bool_inputs_ = [False, True] s = lambda strs: [x.decode("ascii") for x in strs] - with self.test_session(): + with self.cached_session(): for dtype in (dtypes.bool,): input_ = array_ops.placeholder(dtype) @@ -159,7 +159,7 @@ class AsStringOpTest(test.TestCase): ] complex_inputs_ = [(x + (x + 1) * 1j) for x in float_inputs_] - with self.test_session(): + with self.cached_session(): for dtype in (dtypes.complex64, dtypes.complex128): input_ = array_ops.placeholder(dtype) diff --git a/tensorflow/python/kernel_tests/atrous_convolution_test.py b/tensorflow/python/kernel_tests/atrous_convolution_test.py index b98e5fd386..6b16fca29d 100644 --- a/tensorflow/python/kernel_tests/atrous_convolution_test.py +++ b/tensorflow/python/kernel_tests/atrous_convolution_test.py @@ -263,7 +263,7 @@ class AtrousConvolutionTest(test.TestCase): self.assertLess(err, err_tolerance) def testGradient(self): - with self.test_session(): + with self.cached_session(): for padding in ["SAME", "VALID"]: for rate_width in range(1, 3): for rate_height in range(1, 3): diff --git a/tensorflow/python/kernel_tests/attention_ops_test.py b/tensorflow/python/kernel_tests/attention_ops_test.py index fb74698660..1e09ba5b65 100644 --- a/tensorflow/python/kernel_tests/attention_ops_test.py +++ b/tensorflow/python/kernel_tests/attention_ops_test.py @@ -84,7 +84,7 @@ class ExtractGlimpseTest(test.TestCase): image_ops.extract_glimpse(t_cols_4d, t1, t2), [0, 2, 1, 3])) # Evaluate the TensorFlow Graph. - with self.test_session() as sess: + with self.cached_session() as sess: value_rows, value_cols = sess.run([glimpse_rows, glimpse_cols]) # Check dimensions of returned glimpse. @@ -118,7 +118,7 @@ class ExtractGlimpseTest(test.TestCase): def testEmptyTensor(self): empty_image = np.zeros((0, 4, 3, 0)) offsets = np.zeros((0, 2)) - with self.test_session(): + with self.cached_session(): result = image_ops.extract_glimpse(empty_image, [1, 1], offsets) self.assertAllEqual( np.zeros( diff --git a/tensorflow/python/kernel_tests/barrier_ops_test.py b/tensorflow/python/kernel_tests/barrier_ops_test.py index 7f49c63957..4d36b3a465 100644 --- a/tensorflow/python/kernel_tests/barrier_ops_test.py +++ b/tensorflow/python/kernel_tests/barrier_ops_test.py @@ -67,7 +67,7 @@ class BarrierTest(test.TestCase): """, b.barrier_ref.op.node_def) def testInsertMany(self): - with self.test_session(): + with self.cached_session(): b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((), ()), name="B") size_t = b.ready_size() @@ -83,7 +83,7 @@ class BarrierTest(test.TestCase): self.assertEquals(size_t.eval(), [3]) def testInsertManyEmptyTensor(self): - with self.test_session(): + with self.cached_session(): error_message = ("Empty tensors are not supported, but received shape " r"\'\(0,\)\' at index 1") with self.assertRaisesRegexp(ValueError, error_message): @@ -91,7 +91,7 @@ class BarrierTest(test.TestCase): (dtypes.float32, dtypes.float32), shapes=((1,), (0,)), name="B") def testInsertManyEmptyTensorUnknown(self): - with self.test_session(): + with self.cached_session(): b = data_flow_ops.Barrier((dtypes.float32, dtypes.float32), name="B") size_t = b.ready_size() self.assertEqual([], size_t.get_shape()) @@ -103,7 +103,7 @@ class BarrierTest(test.TestCase): insert_0_op.run() def testTakeMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((), ()), name="B") size_t = b.ready_size() @@ -128,7 +128,7 @@ class BarrierTest(test.TestCase): self.assertEqual(values_1_val[idx], v1) def testTakeManySmallBatch(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((), ()), name="B") size_t = b.ready_size() @@ -192,7 +192,7 @@ class BarrierTest(test.TestCase): insert_1_3_op.run() def testUseBarrierWithShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((2, 2), (8,)), name="B") size_t = b.ready_size() @@ -221,7 +221,7 @@ class BarrierTest(test.TestCase): self.assertAllEqual(values_1_val[idx], v1) def testParallelInsertMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier(dtypes.float32, shapes=()) size_t = b.ready_size() keys = [str(x).encode("ascii") for x in range(10)] @@ -241,7 +241,7 @@ class BarrierTest(test.TestCase): self.assertEqual(values_val[idx], v) def testParallelTakeMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier(dtypes.float32, shapes=()) size_t = b.ready_size() keys = [str(x).encode("ascii") for x in range(10)] @@ -275,7 +275,7 @@ class BarrierTest(test.TestCase): zip(keys, values), [(k[0], v[0]) for k, v in zip(key_vals, value_vals)]) def testBlockingTakeMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier(dtypes.float32, shapes=()) keys = [str(x).encode("ascii") for x in range(10)] values = [float(x) for x in range(10)] @@ -297,7 +297,7 @@ class BarrierTest(test.TestCase): t.join() def testParallelInsertManyTakeMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.int64), shapes=((), (2,))) num_iterations = 100 @@ -376,7 +376,7 @@ class BarrierTest(test.TestCase): self.assertAllEqual(taken_i["values_1"], expected_values_1) def testClose(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((), ()), name="B") size_t = b.ready_size() @@ -434,7 +434,7 @@ class BarrierTest(test.TestCase): sess.run(take_t[0]) def testCancel(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((), ()), name="B") size_t = b.ready_size() @@ -487,7 +487,7 @@ class BarrierTest(test.TestCase): sess.run(take_t[0]) def _testClosedEmptyBarrierTakeManyAllowSmallBatchRaises(self, cancel): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.float32), shapes=((), ()), name="B") take_t = b.take_many(1, allow_small_batch=True) @@ -500,7 +500,7 @@ class BarrierTest(test.TestCase): self._testClosedEmptyBarrierTakeManyAllowSmallBatchRaises(cancel=True) def _testParallelInsertManyTakeManyCloseHalfwayThrough(self, cancel): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.int64), shapes=((), (2,))) num_iterations = 50 @@ -576,7 +576,7 @@ class BarrierTest(test.TestCase): self._testParallelInsertManyTakeManyCloseHalfwayThrough(cancel=True) def _testParallelPartialInsertManyTakeManyCloseHalfwayThrough(self, cancel): - with self.test_session() as sess: + with self.cached_session() as sess: b = data_flow_ops.Barrier( (dtypes.float32, dtypes.int64), shapes=((), (2,))) num_iterations = 100 @@ -676,7 +676,7 @@ class BarrierTest(test.TestCase): self._testParallelPartialInsertManyTakeManyCloseHalfwayThrough(cancel=True) def testIncompatibleSharedBarrierErrors(self): - with self.test_session(): + with self.cached_session(): # Do component types and shapes. b_a_1 = data_flow_ops.Barrier( (dtypes.float32,), shapes=(()), shared_name="b_a") diff --git a/tensorflow/python/kernel_tests/base64_ops_test.py b/tensorflow/python/kernel_tests/base64_ops_test.py index be96f45497..1b399942ef 100644 --- a/tensorflow/python/kernel_tests/base64_ops_test.py +++ b/tensorflow/python/kernel_tests/base64_ops_test.py @@ -48,7 +48,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase): return base64_msg def _RunTest(self, msg, pad): - with self.test_session() as sess: + with self.cached_session() as sess: if pad: encoded, decoded = sess.run([self._encoded_t, self._decoded_t], feed_dict={self._msg: msg}) @@ -92,7 +92,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase): encoded = string_ops.encode_base64(msg, pad=pad) decoded = string_ops.decode_base64(encoded) - with self.test_session() as sess: + with self.cached_session() as sess: encoded_value, decoded_value = sess.run([encoded, decoded]) self.assertEqual(encoded_value.shape, msg.shape) @@ -102,7 +102,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase): def try_decode(enc): self._decoded_f.eval(feed_dict={self._encoded_f: enc}) - with self.test_session(): + with self.cached_session(): # Invalid length. msg = np.random.bytes(99) enc = base64.urlsafe_b64encode(msg) diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py index 987a6ffcd4..e651fa0070 100644 --- a/tensorflow/python/kernel_tests/basic_gpu_test.py +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -174,7 +174,7 @@ class BroadcastSimpleTest(test.TestCase): numeric_gradient_type=None): z = np_func(x, y) zs = list(z.shape) - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) if x.dtype in (np.float32, np.float64): @@ -195,7 +195,7 @@ class BroadcastSimpleTest(test.TestCase): numeric_gradient_type=None): z = np_func(x, y) zs = list(z.shape) - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) if x.dtype in (np.float32, np.float64): diff --git a/tensorflow/python/kernel_tests/batch_gather_op_test.py b/tensorflow/python/kernel_tests/batch_gather_op_test.py index 8e7ae89f9d..7dd347989a 100644 --- a/tensorflow/python/kernel_tests/batch_gather_op_test.py +++ b/tensorflow/python/kernel_tests/batch_gather_op_test.py @@ -86,7 +86,7 @@ class GatherTest(test.TestCase): def testString(self): params = np.array([[b"asdf", b"zxcv"], [b"qwer", b"uiop"]]) - with self.test_session(): + with self.cached_session(): indices_tf = constant_op.constant([1]) self.assertAllEqual([[b"qwer", b"uiop"]], array_ops.batch_gather(params, indices_tf).eval()) diff --git a/tensorflow/python/kernel_tests/batchtospace_op_test.py b/tensorflow/python/kernel_tests/batchtospace_op_test.py index 6143cd3baa..03f3f64353 100644 --- a/tensorflow/python/kernel_tests/batchtospace_op_test.py +++ b/tensorflow/python/kernel_tests/batchtospace_op_test.py @@ -60,7 +60,7 @@ class BatchToSpaceDepthToSpace(test.TestCase, PythonOpImpl): array_ops.depth_to_space( array_ops.transpose(x, [3, 1, 2, 0]), block_size=block_size), [3, 1, 2, 0]) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(y1.eval(), y2.eval()) @@ -235,7 +235,7 @@ class BatchToSpaceGradientTest(test.TestCase, PythonOpImpl): # Check the gradients. def _checkGrad(self, x, crops, block_size): assert 4 == x.ndim - with self.test_session(): + with self.cached_session(): tf_x = ops.convert_to_tensor(x) tf_y = self.batch_to_space(tf_x, crops, block_size) epsilon = 1e-5 @@ -293,7 +293,7 @@ class BatchToSpaceNDGradientTest(test.TestCase): block_shape = np.array(block_shape) crops = constant_op.constant( np.array(crops).reshape((len(block_shape), 2)), crops_dtype) - with self.test_session(): + with self.cached_session(): tf_x = ops.convert_to_tensor(x) tf_y = array_ops.batch_to_space_nd(tf_x, block_shape, crops) epsilon = 1e-5 diff --git a/tensorflow/python/kernel_tests/bcast_ops_test.py b/tensorflow/python/kernel_tests/bcast_ops_test.py index 3305e55c05..3ec820aead 100644 --- a/tensorflow/python/kernel_tests/bcast_ops_test.py +++ b/tensorflow/python/kernel_tests/bcast_ops_test.py @@ -28,11 +28,11 @@ from tensorflow.python.platform import test class BcastOpsTest(test.TestCase): def _GetBroadcastShape(self, xs, ys): - with self.test_session() as sess: + with self.cached_session() as sess: return sess.run(broadcast_args(xs, ys)) def _GetGradientArgs(self, xs, ys): - with self.test_session() as sess: + with self.cached_session() as sess: return sess.run(broadcast_gradient_args(xs, ys)) def testBasic(self): diff --git a/tensorflow/python/kernel_tests/betainc_op_test.py b/tensorflow/python/kernel_tests/betainc_op_test.py index 16fdedac41..92d21462d5 100644 --- a/tensorflow/python/kernel_tests/betainc_op_test.py +++ b/tensorflow/python/kernel_tests/betainc_op_test.py @@ -47,7 +47,7 @@ class BetaincTest(test.TestCase): tf_b_s = constant_op.constant(b_s, dtype=dtype) tf_x_s = constant_op.constant(x_s, dtype=dtype) tf_out_t = math_ops.betainc(tf_a_s, tf_b_s, tf_x_s) - with self.test_session(): + with self.cached_session(): tf_out = tf_out_t.eval() scipy_out = special.betainc(a_s, b_s, x_s).astype(np_dt) @@ -60,13 +60,13 @@ class BetaincTest(test.TestCase): # Test out-of-range values (most should return nan output) combinations = list(itertools.product([-1, 0, 0.5, 1.0, 1.5], repeat=3)) a_comb, b_comb, x_comb = np.asarray(list(zip(*combinations)), dtype=np_dt) - with self.test_session(): + with self.cached_session(): tf_comb = math_ops.betainc(a_comb, b_comb, x_comb).eval() scipy_comb = special.betainc(a_comb, b_comb, x_comb).astype(np_dt) self.assertAllCloseAccordingToType(scipy_comb, tf_comb) # Test broadcasting between scalars and other shapes - with self.test_session(): + with self.cached_session(): self.assertAllCloseAccordingToType( special.betainc(0.1, b_s, x_s).astype(np_dt), math_ops.betainc(0.1, b_s, x_s).eval(), @@ -96,7 +96,7 @@ class BetaincTest(test.TestCase): with self.assertRaisesRegexp(ValueError, "must be equal"): math_ops.betainc(0.5, [0.5], [[0.5]]) - with self.test_session(): + with self.cached_session(): with self.assertRaisesOpError("Shapes of .* are inconsistent"): a_p = array_ops.placeholder(dtype) b_p = array_ops.placeholder(dtype) @@ -140,7 +140,7 @@ class BetaincTest(test.TestCase): self._testBetaInc(a_s, b_s, x_s, dtypes.float32) def testBetaIncFpropAndBpropAreNeverNAN(self): - with self.test_session() as sess: + with self.cached_session() as sess: space = np.logspace(-8, 5).tolist() space_x = np.linspace(1e-16, 1 - 1e-16).tolist() ga_s, gb_s, gx_s = zip(*list(itertools.product(space, space, space_x))) @@ -161,7 +161,7 @@ class BetaincTest(test.TestCase): def testBetaIncGrads(self): err_tolerance = 1e-3 - with self.test_session(): + with self.cached_session(): # Test gradient ga_s = np.abs(np.random.randn(2, 2) * 30) # in (0, infty) gb_s = np.abs(np.random.randn(2, 2) * 30) # in (0, infty) diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 2767df127e..8a58b3f97e 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -93,7 +93,7 @@ class BincountTest(test_util.TensorFlowTestCase): def test_negative(self): # unsorted_segment_sum will only report InvalidArgumentError on CPU - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() diff --git a/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py b/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py index 28b3dc45e9..b19077db56 100644 --- a/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py +++ b/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py @@ -38,7 +38,7 @@ class RangeSamplerOpsTest(test.TestCase): TRUE_LABELS = [[1, 2], [0, 4], [3, 3]] def testTrueCandidates(self): - with self.test_session() as sess: + with self.cached_session() as sess: indices = constant_op.constant([0, 0, 1, 1, 2, 2]) true_candidates_vec = constant_op.constant([1, 2, 0, 4, 3, 3]) true_candidates_matrix = array_ops.reshape( @@ -50,7 +50,7 @@ class RangeSamplerOpsTest(test.TestCase): self.assertAllEqual(true_candidates_val, self.TRUE_LABELS) def testSampledCandidates(self): - with self.test_session(): + with self.cached_session(): true_classes = constant_op.constant( [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64) sampled_candidates, _, _ = candidate_sampling_ops.all_candidate_sampler( @@ -62,7 +62,7 @@ class RangeSamplerOpsTest(test.TestCase): self.assertEqual(sampled_candidates.get_shape(), [self.NUM_SAMPLED]) def testTrueLogExpectedCount(self): - with self.test_session(): + with self.cached_session(): true_classes = constant_op.constant( [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64) _, true_expected_count, _ = candidate_sampling_ops.all_candidate_sampler( @@ -77,7 +77,7 @@ class RangeSamplerOpsTest(test.TestCase): [self.BATCH_SIZE, self.NUM_TRUE]) def testSampledLogExpectedCount(self): - with self.test_session(): + with self.cached_session(): true_classes = constant_op.constant( [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64) _, _, sampled_expected_count = candidate_sampling_ops.all_candidate_sampler( # pylint: disable=line-too-long @@ -90,7 +90,7 @@ class RangeSamplerOpsTest(test.TestCase): self.assertEqual(sampled_log_expected_count.get_shape(), [self.NUM_SAMPLED]) def testAccidentalHits(self): - with self.test_session() as sess: + with self.cached_session() as sess: true_classes = constant_op.constant( [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64) sampled_candidates, _, _ = candidate_sampling_ops.all_candidate_sampler( @@ -109,7 +109,7 @@ class RangeSamplerOpsTest(test.TestCase): def testSeed(self): def draw(seed): - with self.test_session(): + with self.cached_session(): true_classes = constant_op.constant( [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64) sampled, _, _ = candidate_sampling_ops.log_uniform_candidate_sampler( diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py index 214d5cb3c0..c90520e46d 100644 --- a/tensorflow/python/kernel_tests/cast_op_test.py +++ b/tensorflow/python/kernel_tests/cast_op_test.py @@ -174,7 +174,7 @@ class CastOpTest(test.TestCase): self.assertAllEqual(np.isnan(self._cast(np.nan, np.float64, True)), True) def _OpError(self, x, dtype, err): - with self.test_session(): + with self.cached_session(): with self.assertRaisesOpError(err): math_ops.cast(x, dtype).eval() @@ -182,7 +182,7 @@ class CastOpTest(test.TestCase): self._OpError(np.arange(0, 10), dtypes.string, "Cast.*int64.*string.*") def testCastToTypeOfVariable(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = variables.Variable(5, dtype=dtypes.float32) y = variables.Variable(True, dtype=dtypes.bool) cast = math_ops.cast(y, x.dtype) @@ -193,7 +193,7 @@ class CastOpTest(test.TestCase): t = [dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128] for src_t in t: for dst_t in t: - with self.test_session(): + with self.cached_session(): x = constant_op.constant(1.0, src_t) z = array_ops.identity(x) y = math_ops.cast(z, dst_t) @@ -209,7 +209,7 @@ class SparseTensorCastTest(test.TestCase): shape = constant_op.constant([3], dtypes.int64) st = sparse_tensor.SparseTensor(indices, values, shape) st_cast = math_ops.cast(st, dtypes.float32) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(st_cast.indices.eval(), [[0], [1], [2]]) self.assertAllEqual(st_cast.values.eval(), np.array([1, 2, 3], np.float32)) @@ -221,7 +221,7 @@ class SaturateCastTest(test.TestCase): def testSaturate(self): in_types = dtypes.float32, out_types = dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.float32 - with self.test_session() as sess: + with self.cached_session() as sess: for in_type in in_types: for out_type in out_types: lo, hi = in_type.min, in_type.max diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py index 7f147ba53a..51611b75af 100644 --- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py +++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py @@ -57,7 +57,7 @@ class GenerateVocabRemappingTest(test.TestCase): new_vocab_offset=0) expected_remapping = range(0, 3) expected_num_present = 3 - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_remapping, remapping.eval()) self.assertAllEqual(expected_num_present, num_present.eval()) @@ -70,7 +70,7 @@ class GenerateVocabRemappingTest(test.TestCase): new_vocab_offset=0) expected_remapping = [2, 0, 1] expected_num_present = 3 - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_remapping, remapping.eval()) self.assertAllEqual(expected_num_present, num_present.eval()) @@ -83,7 +83,7 @@ class GenerateVocabRemappingTest(test.TestCase): new_vocab_offset=1) expected_remapping = [0] expected_num_present = 1 - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_remapping, remapping.eval()) self.assertAllEqual(expected_num_present, num_present.eval()) @@ -98,7 +98,7 @@ class GenerateVocabRemappingTest(test.TestCase): old_vocab_size=2) expected_remapping = [-1, 0, 1] expected_num_present = 2 - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_remapping, remapping.eval()) self.assertAllEqual(expected_num_present, num_present.eval()) @@ -122,7 +122,7 @@ class LoadAndRemapMatrixTest(test.TestCase): self.old_tensor_name = 'some_scope/matrix' save = saver.Saver([matrix]) - with self.test_session() as sess: + with self.cached_session() as sess: variables.global_variables_initializer().run() self.bundle_file = os.path.join(test.get_temp_dir(), 'bundle_checkpoint') save.save(sess, self.bundle_file) @@ -140,7 +140,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[], num_rows=2, num_cols=self.old_num_cols) - with self.test_session(): + with self.cached_session(): self.assertAllClose(self.matrix_value[row_remapping], remapped_matrix.eval()) @@ -155,7 +155,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[], num_rows=len(row_remapping), num_cols=len(col_remapping)) - with self.test_session(): + with self.cached_session(): self.assertAllClose(self.matrix_value[row_remapping][:, col_remapping], remapped_matrix.eval()) @@ -170,7 +170,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[], num_rows=len(row_remapping), num_cols=len(col_remapping)) - with self.test_session(): + with self.cached_session(): self.assertAllClose(self.matrix_value[row_remapping][:, col_remapping], remapped_matrix.eval()) @@ -189,7 +189,7 @@ class LoadAndRemapMatrixTest(test.TestCase): expected_remapped_matrix = np.reshape( [33, init_val, init_val, init_val, 1, init_val], [3, 2]) - with self.test_session(): + with self.cached_session(): self.assertAllClose(expected_remapped_matrix, remapped_matrix.eval()) def test_load_and_remap_all_missing_rows(self): @@ -204,7 +204,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=initializing_values, num_rows=num_rows, num_cols=self.old_num_cols) - with self.test_session(): + with self.cached_session(): self.assertAllClose( np.reshape(initializing_values, (num_rows, self.old_num_cols)), remapped_matrix.eval()) @@ -222,7 +222,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=initializing_values, num_rows=num_rows, num_cols=num_cols) - with self.test_session(): + with self.cached_session(): self.assertAllClose( np.reshape(initializing_values, (num_rows, num_cols)), remapped_matrix.eval()) @@ -243,7 +243,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[], num_rows=len(invalid_remapping), num_cols=self.old_num_cols) - with self.test_session(), self.assertRaises(errors.UnimplementedError): + with self.cached_session(), self.assertRaises(errors.UnimplementedError): remapped_matrix.eval() # Invalid column remapping. @@ -255,7 +255,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[], num_rows=self.old_num_rows, num_cols=len(invalid_remapping)) - with self.test_session(), self.assertRaises(errors.UnimplementedError): + with self.cached_session(), self.assertRaises(errors.UnimplementedError): remapped_matrix.eval() def test_load_and_remap_incorrect_initializing_values(self): @@ -272,7 +272,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[], num_rows=3, num_cols=2) - with self.test_session(), self.assertRaises(errors.InvalidArgumentError): + with self.cached_session(), self.assertRaises(errors.InvalidArgumentError): remapped_matrix.eval() remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( @@ -284,7 +284,7 @@ class LoadAndRemapMatrixTest(test.TestCase): initializing_values=[0] * 5, num_rows=3, num_cols=2) - with self.test_session(), self.assertRaises(errors.InvalidArgumentError): + with self.cached_session(), self.assertRaises(errors.InvalidArgumentError): remapped_matrix.eval() @@ -306,7 +306,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): initializer=constant_op.constant(np_value, dtype=dtypes.float32), partitioner=partitioner) - with self.test_session() as sess: + with self.cached_session() as sess: ckpt_path = os.path.join(test.get_temp_dir(), 'temp_ckpt') save = saver.Saver([matrix]) variables.global_variables_initializer().run() diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index de52a70cc0..bb7b645da2 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -39,7 +39,7 @@ class ClipTest(test.TestCase): min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32) max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32) outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val) - with self.test_session(): + with self.cached_session(): error_1 = gradient_checker.compute_gradient_error(inputs, [4], outputs_1, [4]) self.assertLess(error_1, 1e-4) @@ -139,7 +139,7 @@ class ClipTest(test.TestCase): def testClipByValueNonFinite(self): # TODO(b/78016351): Enable test on GPU once the bug is fixed. - with self.test_session(): + with self.cached_session(): x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')]) np_ans = [float('NaN'), 4.0, -4.0] clip_value = 4.0 diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index c22934ce47..0e59ce6972 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -383,7 +383,7 @@ class ConcatOpTest(test.TestCase): np.random.random_sample(x_shape).astype(np.float64) for x_shape in x_shapes ] - with self.test_session(): + with self.cached_session(): xs = [constant_op.constant(x_val) for x_val in x_vals] output = array_ops.concat(xs, 0) err = gradient_checker.compute_gradient_error(xs, x_shapes, output, @@ -397,7 +397,7 @@ class ConcatOpTest(test.TestCase): np.random.random_sample(x_shape).astype(np.float64) for x_shape in x_shapes ] - with self.test_session(): + with self.cached_session(): xs = [constant_op.constant(x_val) for x_val in x_vals] output = array_ops.concat(xs, 1) err = gradient_checker.compute_gradient_error(xs, x_shapes, output, @@ -411,7 +411,7 @@ class ConcatOpTest(test.TestCase): np.random.random_sample(x_shape).astype(np.float64) for x_shape in x_shapes ] - with self.test_session(): + with self.cached_session(): xs = [constant_op.constant(x_val) for x_val in x_vals] x_concat = array_ops.concat(xs, 0) output = array_ops.gather(x_concat, [1, 2, 0, 5]) @@ -426,7 +426,7 @@ class ConcatOpTest(test.TestCase): np.random.random_sample(x_shape).astype(np.float64) for x_shape in x_shapes ] - with self.test_session(): + with self.cached_session(): xs = [constant_op.constant(x_val) for x_val in x_vals] x_concat = array_ops.concat(xs, 1) output = array_ops.gather(x_concat, [1, 2, 0, 5]) @@ -441,7 +441,7 @@ class ConcatOpTest(test.TestCase): np.random.random_sample(x_shape).astype(np.float64) for x_shape in x_shapes ] - with self.test_session(): + with self.cached_session(): xs = [constant_op.constant(x_val) for x_val in x_vals] x_concat = array_ops.concat(xs, 2) output = array_ops.gather(x_concat, [1, 2, 0, 5]) @@ -452,7 +452,7 @@ class ConcatOpTest(test.TestCase): def testIndexedSlicesConcatDim1Grad_UnknownInputDim(self): x_shapes = [[20, 7, 3], [20, 3, 3], [20, 1, 3]] output_shape = [4, 11, 3] - with self.test_session(): + with self.cached_session(): x_1 = array_ops.placeholder(dtypes.float64) x_2 = array_ops.placeholder(dtypes.float64) x_3 = array_ops.placeholder(dtypes.float64) @@ -473,13 +473,13 @@ class ConcatOpTest(test.TestCase): def testConcatTuple(self): c1 = np.random.rand(4, 4) c2 = np.random.rand(4, 4) - with self.test_session(): + with self.cached_session(): concat_list_t = array_ops.concat([c1, c2], 0) concat_tuple_t = array_ops.concat((c1, c2), 0) self.assertAllEqual(concat_list_t.eval(), concat_tuple_t.eval()) def testConcatNoScalars(self): - with self.test_session(): + with self.cached_session(): scalar = constant_op.constant(7) dim = array_ops.placeholder(dtypes.int32) with self.assertRaisesRegexp( @@ -554,7 +554,7 @@ class ConcatOpTest(test.TestCase): def _testGradientsForAxis( self, inp_tensors, axis, output_shape, feed_dict=None): - with self.test_session(): + with self.cached_session(): c = array_ops.concat(inp_tensors, axis) grad_inp = np.random.rand(*output_shape).astype("f") grad_tensor = constant_op.constant( @@ -566,7 +566,7 @@ class ConcatOpTest(test.TestCase): def _testIndexedSlicesGradientsForAxis( self, inp_tensors, axis, output_shape, gather_indexes, feed_dict=None): - with self.test_session(): + with self.cached_session(): c = array_ops.gather( array_ops.concat(inp_tensors, axis), gather_indexes) grad_inp = np.random.rand(*output_shape).astype("f") @@ -631,7 +631,7 @@ class ConcatOffsetTest(test.TestCase): self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) def testNotVector(self): - with self.test_session() as sess: + with self.cached_session() as sess: cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([[2, 3, 5]], dtypes.int32) s1 = constant_op.constant([[2, 7, 5]], dtypes.int32) @@ -641,7 +641,7 @@ class ConcatOffsetTest(test.TestCase): sess.run(off) def testConcatDimOutOfRange(self): - with self.test_session() as sess: + with self.cached_session() as sess: cdim = constant_op.constant(4, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) @@ -651,7 +651,7 @@ class ConcatOffsetTest(test.TestCase): sess.run(off) def testDimMismatch(self): - with self.test_session() as sess: + with self.cached_session() as sess: cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5, 10], dtypes.int32) @@ -661,7 +661,7 @@ class ConcatOffsetTest(test.TestCase): sess.run(off) def testSizeMismatch(self): - with self.test_session() as sess: + with self.cached_session() as sess: cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 10], dtypes.int32) diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py index 1fac7f8270..18a1b230a0 100644 --- a/tensorflow/python/kernel_tests/cond_v2_test.py +++ b/tensorflow/python/kernel_tests/cond_v2_test.py @@ -107,7 +107,7 @@ class CondV2Test(test.TestCase): self._testCond(true_fn, false_fn, [y]) def testNoInputs(self): - with self.test_session() as sess: + with self.cached_session() as sess: pred = array_ops.placeholder(dtypes.bool, name="pred") def true_fn(): @@ -527,7 +527,7 @@ class CondV2Test(test.TestCase): }), [5., 0.]) def testSecondDerivative(self): - with self.test_session() as sess: + with self.cached_session() as sess: pred = array_ops.placeholder(dtypes.bool, name="pred") x = constant_op.constant(3.0, name="x") diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py index 86802664d1..262352a9af 100644 --- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py +++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py @@ -80,26 +80,26 @@ class ConditionalAccumulatorTest(test.TestCase): """, q.accumulator_ref.op.node_def) def testAccumulatorSizeEmpty(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator(dtypes_lib.float32, name="Q") self.assertEqual(q.num_accumulated().eval(), 0) def testAccumulatorSetGlobalStep(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) set_global_step_op = q.set_global_step(1) set_global_step_op.run() def testAccumulatorApplyGradFloat32(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) accum_op = q.apply_grad((10.0,)) accum_op.run() def testDtypes(self): - with self.test_session() as sess: + with self.cached_session() as sess: dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] for i in range(len(dtypes)): @@ -116,7 +116,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(sum(elems) / len(elems), result) def testAccumulatorMultipleAccumulators(self): - with self.test_session(): + with self.cached_session(): q_f32_0 = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) q_f32_1 = data_flow_ops.ConditionalAccumulator( @@ -135,7 +135,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(result, i + 10.0) def testAccumulatorApplyAndTakeGradWithShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=(3, 2)) elems = [[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], @@ -166,7 +166,7 @@ class ConditionalAccumulatorTest(test.TestCase): q.apply_grad([[1.0], [2.0], [3.0]]) def testAccumulatorDynamicShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=None) @@ -191,7 +191,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertTrue(is_all_equal) def testAccumulatorWrongDynamicShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=None) @@ -209,7 +209,7 @@ class ConditionalAccumulatorTest(test.TestCase): sess.run(accum_op, feed_dict={x: [[1.0], [2.0], [3.0]]}) def testAccumulatorSizeAfterApplyGrad(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) accum_op = q.apply_grad((10.0,)) @@ -220,7 +220,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(q.num_accumulated().eval(), 2) def testAccumulatorSizeAfterApplyGradAndTakeGrad(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) accum_op = q.apply_grad((10.0,)) @@ -248,7 +248,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(q.num_accumulated().eval(), 0) def testAccumulatorTakeGradMean(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) elems = [10.0, 20.0] @@ -307,7 +307,7 @@ class ConditionalAccumulatorTest(test.TestCase): reduction_type="Invalid") def testAccumulatorInvalidTakeGrad(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) elems = [10.0, 20.0] @@ -322,7 +322,7 @@ class ConditionalAccumulatorTest(test.TestCase): takeg_t.eval() def testAccumulatorRepeatedTakeGradMean(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) @@ -379,7 +379,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(elems_sum, val) def testAccumulatorIncrementGlobalStep(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) @@ -395,7 +395,7 @@ class ConditionalAccumulatorTest(test.TestCase): inc_global_step.eval() def testAccumulatorSetGlobalStepPreventsAccumulation(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) @@ -416,7 +416,7 @@ class ConditionalAccumulatorTest(test.TestCase): if x >= ls), val) def testParallelApplyGrad(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] @@ -441,7 +441,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(val, sum(elems) / len(elems)) def testParallelTakeGrad(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) elems = [e for e in range(10)] @@ -473,7 +473,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertItemsEqual(elems, results) def testAccumulatorApplyAndBlockingTake(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) @@ -506,7 +506,7 @@ class ConditionalAccumulatorTest(test.TestCase): sess.run(takeg_op) def testAccumulatorCancel(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) takeg_t = q.take_grad(1) diff --git a/tensorflow/python/kernel_tests/confusion_matrix_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py index 93f5323c41..bc24345261 100644 --- a/tensorflow/python/kernel_tests/confusion_matrix_test.py +++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py @@ -37,7 +37,7 @@ class ConfusionMatrixTest(test.TestCase): @test_util.run_in_graph_and_eager_modes def testExample(self): """This is a test of the example provided in pydoc.""" - with self.test_session(): + with self.cached_session(): self.assertAllEqual([ [0, 0, 0, 0, 0], [0, 0, 1, 0, 0], @@ -49,7 +49,7 @@ class ConfusionMatrixTest(test.TestCase): def _testConfMatrix(self, labels, predictions, truth, weights=None, num_classes=None): - with self.test_session(): + with self.cached_session(): dtype = predictions.dtype ans = confusion_matrix.confusion_matrix( labels, predictions, dtype=dtype, weights=weights, @@ -78,7 +78,7 @@ class ConfusionMatrixTest(test.TestCase): self._testBasic(dtype=np.int64) def _testConfMatrixOnTensors(self, tf_dtype, np_dtype): - with self.test_session() as sess: + with self.cached_session() as sess: m_neg = array_ops.placeholder(dtype=dtypes.float32) m_pos = array_ops.placeholder(dtype=dtypes.float32) s = array_ops.placeholder(dtype=dtypes.float32) @@ -229,7 +229,7 @@ class ConfusionMatrixTest(test.TestCase): def testOutputIsInt32(self): labels = np.arange(2) predictions = np.arange(2) - with self.test_session(): + with self.cached_session(): cm = confusion_matrix.confusion_matrix( labels, predictions, dtype=dtypes.int32) tf_cm = cm.eval() @@ -238,7 +238,7 @@ class ConfusionMatrixTest(test.TestCase): def testOutputIsInt64(self): labels = np.arange(2) predictions = np.arange(2) - with self.test_session(): + with self.cached_session(): cm = confusion_matrix.confusion_matrix( labels, predictions, dtype=dtypes.int64) tf_cm = cm.eval() @@ -260,7 +260,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): confusion_matrix.remove_squeezable_dimensions( labels_placeholder, predictions_placeholder)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(label_values, static_labels.eval()) self.assertAllEqual(prediction_values, static_predictions.eval()) feed_dict = { @@ -285,7 +285,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): confusion_matrix.remove_squeezable_dimensions( labels_placeholder, predictions_placeholder)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(label_values, static_labels.eval()) self.assertAllEqual(prediction_values, static_predictions.eval()) feed_dict = { @@ -310,7 +310,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): confusion_matrix.remove_squeezable_dimensions( labels_placeholder, predictions_placeholder, expected_rank_diff=0)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(label_values, static_labels.eval()) self.assertAllEqual(prediction_values, static_predictions.eval()) feed_dict = { @@ -336,7 +336,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): labels_placeholder, predictions_placeholder)) expected_label_values = np.reshape(label_values, newshape=(2, 3)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_label_values, static_labels.eval()) self.assertAllEqual(prediction_values, static_predictions.eval()) feed_dict = { @@ -362,7 +362,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): labels_placeholder, predictions_placeholder, expected_rank_diff=1)) expected_label_values = np.reshape(label_values, newshape=(2, 3)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_label_values, static_labels.eval()) self.assertAllEqual(prediction_values, static_predictions.eval()) feed_dict = { @@ -388,7 +388,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): labels_placeholder, predictions_placeholder)) expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(label_values, static_labels.eval()) self.assertAllEqual(expected_prediction_values, static_predictions.eval()) feed_dict = { @@ -415,7 +415,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): labels_placeholder, predictions_placeholder, expected_rank_diff=-1)) expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3)) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(label_values, static_labels.eval()) self.assertAllEqual(expected_prediction_values, static_predictions.eval()) feed_dict = { @@ -441,7 +441,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): confusion_matrix.remove_squeezable_dimensions( labels_placeholder, predictions_placeholder)) - with self.test_session(): + with self.cached_session(): feed_dict = { labels_placeholder: label_values, predictions_placeholder: prediction_values @@ -466,7 +466,7 @@ class RemoveSqueezableDimensionsTest(test.TestCase): confusion_matrix.remove_squeezable_dimensions( labels_placeholder, predictions_placeholder)) - with self.test_session(): + with self.cached_session(): feed_dict = { labels_placeholder: label_values, predictions_placeholder: prediction_values diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 107ee37fab..d1e4e5477f 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -162,18 +162,18 @@ class ConstantTest(test.TestCase): logging_const_op.run() def testStringWithNulls(self): - with self.test_session(): + with self.cached_session(): val = ops.convert_to_tensor(b"\0\0\0\0").eval() self.assertEqual(len(val), 4) self.assertEqual(val, b"\0\0\0\0") - with self.test_session(): + with self.cached_session(): val = ops.convert_to_tensor(b"xx\0xx").eval() self.assertEqual(len(val), 5) self.assertAllEqual(val, b"xx\0xx") nested = [[b"\0\0\0\0", b"xx\0xx"], [b"\0_\0_\0_\0", b"\0"]] - with self.test_session(): + with self.cached_session(): val = ops.convert_to_tensor(nested).eval() # NOTE(mrry): Do not use assertAllEqual, because it converts nested to a # numpy array, which loses the null terminators. @@ -279,7 +279,7 @@ class AsTensorTest(test.TestCase): self.assertTrue(isinstance(x, ops.Tensor)) def testAsTensorForShapeInput(self): - with self.test_session(): + with self.cached_session(): x = ops.convert_to_tensor(tensor_shape.TensorShape([])) self.assertEqual(dtypes_lib.int32, x.dtype) self.assertAllEqual([], x.eval()) @@ -331,7 +331,7 @@ class AsTensorTest(test.TestCase): tensor_shape.TensorShape([1, 2, 3]), dtype=dtypes_lib.float32) def testAsTensorForDimensionInput(self): - with self.test_session(): + with self.cached_session(): x = ops.convert_to_tensor(tensor_shape.TensorShape([1, 2, 3])[1]) self.assertEqual(dtypes_lib.int32, x.dtype) self.assertAllEqual(2, x.eval()) @@ -367,7 +367,7 @@ class IdentityOpTest(test.TestCase): class ZerosTest(test.TestCase): def _Zeros(self, shape): - with self.test_session(): + with self.cached_session(): ret = array_ops.zeros(shape) self.assertEqual(shape, ret.get_shape()) return ret.eval() @@ -379,13 +379,13 @@ class ZerosTest(test.TestCase): def testScalar(self): self.assertEqual(0, self._Zeros([])) self.assertEqual(0, self._Zeros(())) - with self.test_session(): + with self.cached_session(): scalar = array_ops.zeros(constant_op.constant([], dtype=dtypes_lib.int32)) self.assertEqual(0, scalar.eval()) def testDynamicSizes(self): np_ans = np.array([[0] * 3] * 2) - with self.test_session(): + with self.cached_session(): # Creates a tensor of 2 x 3. d = array_ops.fill([2, 3], 12., name="fill") # Constructs a tensor of zeros of the same dimensions as "d". @@ -396,7 +396,7 @@ class ZerosTest(test.TestCase): self.assertShapeEqual(np_ans, z) def testDtype(self): - with self.test_session(): + with self.cached_session(): d = array_ops.fill([2, 3], 12., name="fill") self.assertEqual(d.get_shape(), [2, 3]) # Test default type for both constant size and dynamic size @@ -489,7 +489,7 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeDtype(self): # Make sure zeros_like works even for dtypes that cannot be cast between - with self.test_session(): + with self.cached_session(): shape = (3, 5) dtypes = np.float32, np.complex64 for in_type in dtypes: @@ -533,7 +533,7 @@ class ZerosLikeTest(test.TestCase): class OnesTest(test.TestCase): def _Ones(self, shape): - with self.test_session(): + with self.cached_session(): ret = array_ops.ones(shape) self.assertEqual(shape, ret.get_shape()) return ret.eval() @@ -544,13 +544,13 @@ class OnesTest(test.TestCase): def testScalar(self): self.assertEqual(1, self._Ones([])) self.assertEqual(1, self._Ones(())) - with self.test_session(): + with self.cached_session(): scalar = array_ops.ones(constant_op.constant([], dtype=dtypes_lib.int32)) self.assertEqual(1, scalar.eval()) def testDynamicSizes(self): np_ans = np.array([[1] * 3] * 2) - with self.test_session(): + with self.cached_session(): # Creates a tensor of 2 x 3. d = array_ops.fill([2, 3], 12., name="fill") # Constructs a tensor of ones of the same dimensions as "d". @@ -561,7 +561,7 @@ class OnesTest(test.TestCase): self.assertShapeEqual(np_ans, z) def testAutoPack(self): - with self.test_session(): + with self.cached_session(): h = array_ops.placeholder(dtypes_lib.int32, shape=[]) w = array_ops.placeholder(dtypes_lib.int32, shape=[]) z = array_ops.ones([h, w]) @@ -569,7 +569,7 @@ class OnesTest(test.TestCase): self.assertAllEqual(out, np.array([[1] * 16] * 4)) def testDtype(self): - with self.test_session(): + with self.cached_session(): d = array_ops.fill([2, 3], 12., name="fill") self.assertEqual(d.get_shape(), [2, 3]) # Test default type for both constant size and dynamic size @@ -606,7 +606,7 @@ class OnesLikeTest(test.TestCase): dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype - with self.test_session(): + with self.cached_session(): # Creates a tensor of non-zero values with shape 2 x 3. d = constant_op.constant( np.ones( @@ -672,7 +672,7 @@ class FillTest(test.TestCase): self.assertAllEqual(np_ans, tf_ans) def testFillNegative(self): - with self.test_session(): + with self.cached_session(): for shape in (-1,), (2, -1), (-1, 2), (-2), (-3): with self.assertRaises(ValueError): array_ops.fill(shape, 7) @@ -703,7 +703,7 @@ class FillTest(test.TestCase): self.assertEqual([None, 17], f.get_shape().as_list()) def testGradient(self): - with self.test_session(): + with self.cached_session(): in_v = constant_op.constant(5.0) out_shape = [3, 2] out_filled = array_ops.fill(out_shape, in_v) @@ -715,7 +715,7 @@ class FillTest(test.TestCase): class PlaceholderTest(test.TestCase): def testDtype(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.float32, shape=(10, 10), name="p") p_identity = array_ops.identity(p) feed_array = np.random.rand(10, 10) @@ -727,7 +727,7 @@ class PlaceholderTest(test.TestCase): p_identity.eval() def testShape(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.float32, shape=(10, 10), name="p") p_identity = array_ops.identity(p) feed_array = np.random.rand(10, 10) @@ -744,7 +744,7 @@ class PlaceholderTest(test.TestCase): p_identity.eval(feed_dict={p: feed_array[:5, :5]}) def testUnknownShape(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.float32, shape=None, name="p") p_identity = array_ops.identity(p) # can feed anything @@ -756,13 +756,13 @@ class PlaceholderTest(test.TestCase): p_identity.eval(feed_dict={p: feed_array}), feed_array) def testScalarShape(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.float32, shape=[], name="p") p_identity = array_ops.identity(p) self.assertAllClose(p_identity.eval(feed_dict={p: 5}), 5) def testPartialShape(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.float32, shape=[None, 3], name="p") p_identity = array_ops.identity(p) feed_array = np.random.rand(10, 3) @@ -774,7 +774,7 @@ class PlaceholderTest(test.TestCase): p_identity.eval(feed_dict={p: feed_array[:5, :2]}) def testPartialShapeWhenNotFed(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.float32, shape=[None, 3], name="p") p_identity = array_ops.identity(p) @@ -784,7 +784,7 @@ class PlaceholderTest(test.TestCase): p_identity.eval() def testControlDependency(self): - with self.test_session(): + with self.cached_session(): p = array_ops.placeholder(dtypes_lib.int32, shape=[], name="p") with ops.control_dependencies([p]): c = constant_op.constant(5, dtypes_lib.int32) @@ -872,7 +872,7 @@ versions { """ gdef = graph_pb2.GraphDef() text_format.Merge(graph, gdef) - with self.test_session(): + with self.cached_session(): p, ret = importer.import_graph_def( gdef, return_elements=["Placeholder:0", "add:0"]) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 374faad7a7..ebeabcfe1a 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -129,7 +129,7 @@ def isum(s, maximum_iterations=None): class ControlFlowTest(test.TestCase): def testRefIdentity(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable(7) v = control_flow_ops._Identity(v) @@ -141,7 +141,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(9, v2.eval()) def testRefEnter(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable(7) enter_v = control_flow_ops._Enter(v, "foo_1", is_constant=True) @@ -154,7 +154,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(9, v3.eval()) def testRefSwitch(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable(7) p = constant_op.constant(True) @@ -164,7 +164,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(9, v2.eval()) def testEnterMulExit(self): - with self.test_session(): + with self.cached_session(): data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") enter_data = gen_control_flow_ops.enter(data, "foo_1", False) five = constant_op.constant(5) @@ -176,7 +176,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.array([x * 5 for x in [1, 2, 3, 4, 5, 6]]), result) def testEnterShapePropagation(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable([0.0, 0.0], dtype=dtypes.float32) # If is_constant=True, the shape information should be propagated. @@ -190,7 +190,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(enter_v_non_constant.shape, None) def testSwitchMergeIndexedSlices(self): - with self.test_session(): + with self.cached_session(): values = constant_op.constant([1, 2, 3, 4, 5, 6]) indices = constant_op.constant([0, 2, 4, 6, 8, 10]) data = ops.IndexedSlices(values, indices) @@ -204,7 +204,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.arange(0, 12, 2), ind) def testSwitchDeadBranch(self): - with self.test_session(): + with self.cached_session(): data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") ports = ops.convert_to_tensor(True, name="ports") switch_op = control_flow_ops.switch(data, ports) @@ -216,7 +216,7 @@ class ControlFlowTest(test.TestCase): dead_branch.eval() def testSwitchMergeLess(self): - with self.test_session(): + with self.cached_session(): data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") zero = ops.convert_to_tensor(0) one = ops.convert_to_tensor(1) @@ -228,7 +228,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.arange(1, 7), result) def testSwitchMergeAddIdentity(self): - with self.test_session(): + with self.cached_session(): data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") ports = ops.convert_to_tensor(False, name="ports") switch_op = control_flow_ops.switch(data, ports) @@ -241,7 +241,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.array([x + 1 for x in [1, 2, 3, 4, 5, 6]]), result) def testSwitchMergeAddMul(self): - with self.test_session(): + with self.cached_session(): data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data") ports = ops.convert_to_tensor(True, name="ports") switch_op = control_flow_ops.switch(data, ports) @@ -255,7 +255,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.array([x * 5 for x in [1, 2, 3, 4, 5, 6]]), result) def testLoop_false(self): - with self.test_session(): + with self.cached_session(): false = ops.convert_to_tensor(False) n = constant_op.constant(10) @@ -272,7 +272,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, result) def testLoop_1(self): - with self.test_session(): + with self.cached_session(): zero = constant_op.constant(0) one = constant_op.constant(1) n = constant_op.constant(10) @@ -298,7 +298,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, result) def testLoop_2(self): - with self.test_session(): + with self.cached_session(): zero = constant_op.constant(0) one = constant_op.constant(1) n = constant_op.constant(10) @@ -324,7 +324,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, result) def testDifferentFrame(self): - with self.test_session(): + with self.cached_session(): data = array_ops.placeholder(dtypes.float32, shape=[]) enter_1 = gen_control_flow_ops.enter(data, "foo_1", False) enter_2 = gen_control_flow_ops.enter(data, "foo_2", False) @@ -352,7 +352,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual([None], grad) def testFetchable(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = array_ops.placeholder(dtypes.float32) control_flow_ops.cond( constant_op.constant(True), lambda: x + 2, lambda: x + 0) @@ -367,7 +367,7 @@ class ControlFlowTest(test.TestCase): sess.run(t, feed_dict={x: 3}) def testFeedable(self): - with self.test_session() as sess: + with self.cached_session() as sess: c = constant_op.constant(2) i0 = constant_op.constant(0) r = control_flow_ops.while_loop(lambda i: i < 1000, @@ -387,7 +387,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113296180") - with self.test_session(): + with self.cached_session(): values = constant_op.constant(10) indices = constant_op.constant(0) x = ops.IndexedSlices(values, indices) @@ -405,7 +405,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113296161 (SparseTensors)") - with self.test_session(): + with self.cached_session(): values = constant_op.constant([2.0, 4.0], name="values") indices = constant_op.constant( [[0], [3]], dtype=dtypes.int64, name="indices") @@ -425,7 +425,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): rv = resource_variable_ops.ResourceVariable(True) variables.global_variables_initializer().run() t = ops.convert_to_tensor(1.0) @@ -441,7 +441,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113293074") - with self.test_session(): + with self.cached_session(): values = constant_op.constant(10) i_32 = ops.convert_to_tensor(0, name="one", dtype=dtypes.int32) i_64 = ops.convert_to_tensor(0, name="one", dtype=dtypes.int64) @@ -494,7 +494,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): x = constant_op.constant(10) r = control_flow_ops.cond( math_ops.less(1, 0), lambda: math_ops.add(x, 1), @@ -506,7 +506,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): x = constant_op.constant(10) pred = math_ops.less(1, 2) fn1 = lambda: math_ops.add(x, 1) @@ -521,7 +521,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113324949 (ref vars)") - with self.test_session(): + with self.cached_session(): v1 = variables.Variable(7) v2 = variables.Variable(7) v3 = variables.Variable(7) @@ -542,7 +542,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(7, v3.eval()) def testCond_5(self): - with self.test_session(): + with self.cached_session(): alive = constant_op.constant(True, name="alive") count = constant_op.constant(0, name="count") @@ -559,7 +559,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): v1 = variables.Variable([7]) age = constant_op.constant(3) @@ -573,7 +573,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.array([7]), result) def testCond_7(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = constant_op.constant(10) y = constant_op.constant(200) pred = math_ops.less(1, 2) @@ -586,7 +586,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): x = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, @@ -602,7 +602,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/79881896") - with self.test_session() as sess: + with self.cached_session(): control_holder = array_ops.placeholder(dtypes.float32, shape=()) a = constant_op.constant(3) @@ -617,7 +617,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(5, r.eval()) def testUninitializedRefIdentity(self): - with self.test_session() as sess: + with self.cached_session() as sess: v = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, @@ -689,11 +689,11 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.cond(pred, fn1, fn2) grad = gradients_impl.gradients(r, [x])[0] - with self.test_session(): + with self.cached_session(): self.assertAllEqual(1.0, grad.eval()) def testCondGrad_2(self): - with self.test_session(): + with self.cached_session(): c = array_ops.placeholder(dtypes.int32, shape=[]) x = constant_op.constant(10.0) pred = math_ops.less(c, 2) @@ -709,7 +709,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/110550782 (gradient w.r.t external variable)") - with self.test_session(): + with self.cached_session(): c = array_ops.placeholder(dtypes.int32, shape=[]) ox = constant_op.constant(10.0) pred = math_ops.less(c, 2) @@ -726,7 +726,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(30.0, r.eval(feed_dict={c: 3})) def testNestedCond_Simple(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant(0., name="X") y = control_flow_ops.cond( constant_op.constant(True), lambda: x, @@ -744,7 +744,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113327884") - with self.test_session() as sess: + with self.cached_session() as sess: v1 = variables.Variable([1.0, 42.0]) c = array_ops.placeholder(dtypes.int32, shape=[]) pred = math_ops.less(c, 2) @@ -768,7 +768,7 @@ class ControlFlowTest(test.TestCase): # Microbenchmark: 256,000 iterations/s. def testWhile_1(self): - with self.test_session(): + with self.cached_session(): n = constant_op.constant(0) c = lambda x: math_ops.less(x, 10000) b = lambda x: math_ops.add(x, 1) @@ -776,7 +776,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(10000, r.eval()) def testWhileExternalControlDependencies(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable(0.0) v.initializer.run() increment = v.assign_add(1.0) @@ -791,7 +791,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(v.eval(), 1.0) def testWhileExternalControlDependenciesNoInput(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable(0.0) v.initializer.run() increment = v.assign_add(1.0) @@ -806,7 +806,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(v.eval(), 1.0) def testWhileWithRefs_1(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = variables.Variable(0)._ref() # pylint: disable=protected-access i = constant_op.constant(0) c = lambda i, x: math_ops.less(i, 100) @@ -830,19 +830,19 @@ class ControlFlowTest(test.TestCase): self.assertEqual(0, value_x) def testWhile_2(self): - with self.test_session(): + with self.cached_session(): s = constant_op.constant(0) r = isum(s) self.assertAllEqual(45, r.eval()) def testWhileWithMaximumIterations(self): - with self.test_session(): + with self.cached_session(): s = constant_op.constant([1, 2, 3, 4, 5]) r = isum(s, maximum_iterations=3) self.assertAllEqual([1 + 3, 2 + 3, 3 + 3, 4 + 3, 5 + 3], r.eval()) def testWhileWithMaximumIterationsAndSingleArgument(self): - with self.test_session(): + with self.cached_session(): r = control_flow_ops.while_loop( lambda i: i < 3, lambda i: i + 1, [0], maximum_iterations=1) self.assertEqual(1, r.eval()) @@ -1019,7 +1019,7 @@ class ControlFlowTest(test.TestCase): # Have more than 10 parallel iterations and hence exercise k-bound # most of the time. def testWhile_3(self): - with self.test_session(): + with self.cached_session(): def compute(i, m, c, o): m, c = [math_ops.add(m, 1), math_ops.add(c, 1)] @@ -1039,7 +1039,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10100, result) def testWhile_4(self): - with self.test_session(): + with self.cached_session(): def compute(i, m, c, o): m, c = [array_ops.gather(x, i), array_ops.gather(x, i)] @@ -1060,7 +1060,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(42, result) def testWhile_5(self): - with self.test_session(): + with self.cached_session(): def compute(i, c, o): c = array_ops.strided_slice(x, array_ops.expand_dims(i, 0), @@ -1088,7 +1088,7 @@ class ControlFlowTest(test.TestCase): trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() - with self.test_session() as sess: + with self.cached_session() as sess: with ops.device("/cpu:0"): c = constant_op.constant(2) i0 = constant_op.constant(0) @@ -1134,7 +1134,7 @@ class ControlFlowTest(test.TestCase): self._testWhile_Gpu_1(use_gpu=True) def testWhileShape(self): - with self.test_session(): + with self.cached_session(): i = constant_op.constant(0) m = array_ops.ones([2, 2]) c = lambda i, j: math_ops.less(i, 2) @@ -1151,7 +1151,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(np.ones((8, 8)), r.eval()) def testWhileWithNonTensorInput_Scalar(self): - with self.test_session(): + with self.cached_session(): n = 0 c = lambda x: x < 10000 b = lambda x: x + 1 @@ -1159,7 +1159,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(10000, r.eval()) def testWhileWithNonTensorInput_Vector(self): - with self.test_session(): + with self.cached_session(): n = np.array([0]) # Note, [0] would not work here; that is a list c = lambda x: x[0] < 10000 b = lambda x: array_ops.stack([x[0] + 1]) @@ -1167,7 +1167,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual([10000], r.eval()) def testWhileShapeInference(self): - with self.test_session(): + with self.cached_session(): i = constant_op.constant(0) m = array_ops.ones([2, 2]) c = lambda i, j: math_ops.less(i, 2) @@ -1192,7 +1192,7 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop(c, b, [i, m]) def testWhileShapeInferenceSparseTensor(self): - with self.test_session(): + with self.cached_session(): values = constant_op.constant([2.0, 4.0], name="values") indices = constant_op.constant( [[0], [3]], dtype=dtypes.int64, name="indices") @@ -1223,7 +1223,7 @@ class ControlFlowTest(test.TestCase): [i.get_shape(), tensor_shape.TensorShape([5])]) def testWhileShapeInferenceIndexedSlices(self): - with self.test_session(): + with self.cached_session(): values = constant_op.constant([[2.0, 4.0], [3.0, 5.0]], name="values") indices = constant_op.constant([0, 3], name="indices") shape = constant_op.constant([10, 2], name="dense_shape") @@ -1313,7 +1313,7 @@ class ControlFlowTest(test.TestCase): self._testNestedWhile_2(use_gpu=True) def testWhileWithControl_1(self): - with self.test_session(): + with self.cached_session(): n = constant_op.constant(0) r = constant_op.constant(0) condition = lambda n_, r_: math_ops.less(n_, 10) @@ -1329,7 +1329,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(12, res[1].eval()) def testWhileWithControl_2(self): - with self.test_session(): + with self.cached_session(): r = constant_op.constant(0) condition = lambda r_: math_ops.less(r_, 10) @@ -1343,7 +1343,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(12, res.eval()) def testWhileWithControl_3(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = array_ops.placeholder(dtypes.bool) c = constant_op.constant(1) x0 = constant_op.constant(0) @@ -1352,7 +1352,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(10, sess.run(r, {b: True})) def testWhileWithControl_4(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = array_ops.placeholder(dtypes.bool) c = constant_op.constant(1) x0 = constant_op.constant(0) @@ -1362,7 +1362,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(10, sess.run(r, {b: True})) def testWhileWithControl_5(self): - with self.test_session() as sess: + with self.cached_session() as sess: b = array_ops.placeholder(dtypes.bool) c = constant_op.constant(1) x0 = constant_op.constant(0) @@ -1380,7 +1380,7 @@ class ControlFlowTest(test.TestCase): # Ensure that no control edges by an outer control dependency context are # added to nodes inside cond/while contexts. - with self.test_session() as sess: + with self.cached_session() as sess: const_true = lambda: constant_op.constant(True) const_false = lambda: constant_op.constant(False) cond = lambda i: control_flow_ops.cond(i > 0, const_true, const_false) @@ -1395,7 +1395,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113324949 (ref vars)") - with self.test_session(): + with self.cached_session(): v = variable_scope.get_variable( "v", [], initializer=init_ops.constant_initializer(2)) i0 = constant_op.constant(0) @@ -1420,7 +1420,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294340 (enable while_v2)") - with self.test_session(): + with self.cached_session(): v = variables.Variable(1) def false_branch(): @@ -1446,7 +1446,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): n = ops.convert_to_tensor(0, name="n") c = lambda x: math_ops.less(x, 10) b = lambda x: math_ops.add(x, 1) @@ -1459,7 +1459,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): n = ops.convert_to_tensor(0) c = lambda x: math_ops.less(x, 10) b = lambda x: math_ops.add(x, 1) @@ -1501,7 +1501,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") - with self.test_session(): + with self.cached_session(): i = ops.convert_to_tensor(0, name="i") n = ops.convert_to_tensor(10, name="n") one = ops.convert_to_tensor(1, name="one") @@ -1519,7 +1519,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") - with self.test_session(): + with self.cached_session(): n = ops.convert_to_tensor(0, name="n") c = lambda x: math_ops.less(x, 10) b = lambda x: control_flow_ops.cond(constant_op.constant(True), lambda: math_ops.add(x, 1), lambda: n) @@ -1530,7 +1530,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") - with self.test_session(): + with self.cached_session(): n = ops.convert_to_tensor(0) c = lambda x: math_ops.less(x, 10) # pylint: disable=undefined-variable @@ -1544,7 +1544,7 @@ class ControlFlowTest(test.TestCase): # NOTE: It is ok to have parallel_iterations > 1 def testWhileUpdateVariable_1(self): - with self.test_session(): + with self.cached_session(): select = variables.Variable([3.0, 4.0, 5.0]) n = constant_op.constant(0) @@ -1566,7 +1566,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(np.array([10.0, 10.0, 10.0]), result) def testWhileUpdateVariable_2(self): - with self.test_session(): + with self.cached_session(): select1 = variables.Variable([3.0, 4.0, 5.0]) select2 = variables.Variable([3.0, 4.0, 5.0]) n = constant_op.constant(0) @@ -1592,7 +1592,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(np.array([10.0, 10.0, 10.0]), result2) def testWhileUpdateVariable_3(self): - with self.test_session(): + with self.cached_session(): select = variables.Variable([3.0, 4.0, 5.0]) n = constant_op.constant(0) @@ -1614,7 +1614,7 @@ class ControlFlowTest(test.TestCase): # b/24814703 def testWhileUpdateVariable_4(self): - with self.test_session(): + with self.cached_session(): var_a = variables.Variable(0, name="a") var_b = variables.Variable(0, name="b") variables.global_variables_initializer().run() @@ -1642,7 +1642,7 @@ class ControlFlowTest(test.TestCase): # b/24736492 def testWhileUpdateVariable_5(self): - with self.test_session(): + with self.cached_session(): # Create some variables. var_a = variables.Variable(0, name="a") var_b = variables.Variable(0, name="b") @@ -1672,7 +1672,7 @@ class ControlFlowTest(test.TestCase): # b/24814668 def testWhileUpdateVariable_6(self): - with self.test_session(): + with self.cached_session(): # Create some variables. var_a = variables.Variable(0, name="a") var_b = variables.Variable(0, name="b") @@ -1701,7 +1701,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(10, var_a.eval()) def testWhileQueue_1(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(-1, dtypes.int32) i = constant_op.constant(0) @@ -1719,7 +1719,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual([i], q.dequeue().eval()) def testWhileStack_1(self): - with self.test_session(): + with self.cached_session(): s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo") i = constant_op.constant(0) @@ -1791,7 +1791,7 @@ class ControlFlowTest(test.TestCase): self._testWhileGrad_ColocateGradients(colocate=True) def testWhileGrad_Square(self): - with self.test_session(): + with self.cached_session(): v = constant_op.constant(2.0, name="v") c = lambda v: math_ops.less(v, 100.0) b = math_ops.square @@ -1802,7 +1802,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(1024.0, r.eval()) def testWhileGrad_Shape(self): - with self.test_session(): + with self.cached_session(): x = array_ops.placeholder(dtypes.float32, shape=[None]) v = constant_op.constant([2.0], name="v") n = constant_op.constant(0, name="n") @@ -1819,7 +1819,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose([810.0, 2560.0], r.eval(feed_dict={x: [3.0, 4.0]})) def testWhileGrad_BaseShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = array_ops.placeholder(dtypes.float32, [None]) v0 = constant_op.constant([2.0, 2.0], name="v") c = lambda v: constant_op.constant(False) @@ -1831,7 +1831,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose([2.0, 4.0], sess.run(r, feed_dict={x: [1.0, 2.0]})) def testWhileGrad_MultipleUses(self): - with self.test_session(): + with self.cached_session(): v = constant_op.constant(2.0, name="v") c = lambda v: math_ops.less(v, 100.0) b = math_ops.square @@ -1842,7 +1842,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(524288.0, r.eval()) def testWhileGrad_LoopAdd(self): - with self.test_session(): + with self.cached_session(): v = constant_op.constant(2.0, name="v") c = lambda v: math_ops.less(v, 100.0) b = math_ops.square @@ -1901,7 +1901,7 @@ class ControlFlowTest(test.TestCase): self._testNestedWhileCondWhileGrad(use_gpu=True) def testWhileGrad_Variable(self): - with self.test_session(): + with self.cached_session(): a = variables.Variable(3.0) v = constant_op.constant(2.0, name="v") c = lambda v: math_ops.less(v, 100.0) @@ -1916,7 +1916,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/110550782 (gradient w.r.t external variable)") - with self.test_session(): + with self.cached_session(): n = ops.convert_to_tensor(1.0, name="n") x = array_ops.placeholder(dtypes.float32, shape=None) c = lambda n: math_ops.less(n, 10.0) @@ -1931,7 +1931,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0})) def testGradInWhileWrtInitialLoopVal(self): - with self.test_session(): + with self.cached_session(): x = array_ops.placeholder(dtypes.float32, shape=(), name="x") y = x + 1 @@ -1948,7 +1948,7 @@ class ControlFlowTest(test.TestCase): control_flow_ops.while_loop(lambda i, x: i < 3, body, [0, y]) def testWhileGradInWhile(self): - with self.test_session(): + with self.cached_session(): n = ops.convert_to_tensor(1.0, name="n") x = array_ops.placeholder(dtypes.float32, shape=None) c = lambda n: math_ops.less(n, 10.0) @@ -1978,13 +1978,13 @@ class ControlFlowTest(test.TestCase): i, x = control_flow_ops.while_loop(lambda i, x: i < 3, outer_body, [0, 0.0]) - with self.test_session() as sess: + with self.cached_session() as sess: i_val, x_val = sess.run([i, x]) self.assertEqual(i_val, 3) self.assertAllClose(x_val, 1.0) def testWhile_NestedInput(self): - with self.test_session() as sess: + with self.cached_session() as sess: named = collections.namedtuple("named", ("a", "b")) loop_vars = [ named(a=constant_op.constant(0.0), b=constant_op.constant(1.0)), @@ -2011,7 +2011,7 @@ class ControlFlowTest(test.TestCase): sess.run(r_flattened)) def testWhile_NestedBadArityFails(self): - with self.test_session(): + with self.cached_session(): named = collections.namedtuple("named", ("a", "b")) loop_vars = [ named(a=constant_op.constant(0.0), b=constant_op.constant(1.0)), @@ -2027,7 +2027,7 @@ class ControlFlowTest(test.TestCase): control_flow_ops.while_loop(c, b, loop_vars) def testWhileGrad_ys_xs(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant(3.0, name="x") y = constant_op.constant(2.0, name="y") @@ -2050,7 +2050,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(120.0, r[0].eval()) def testWhileGrad_Dependency(self): - with self.test_session(): + with self.cached_session(): i = constant_op.constant(0, name="i") x = constant_op.constant(2.0, name="x") @@ -2069,7 +2069,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(1024.0, r[0].eval()) def testWhileGrad_NoGradient(self): - with self.test_session(): + with self.cached_session(): v = constant_op.constant(2.0, name="v") c = lambda v: math_ops.less(v, 100.0) b = math_ops.square @@ -2079,7 +2079,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(1.0, r[0].eval()) def testWhileGrad_NoDependency(self): - with self.test_session() as sess: + with self.cached_session() as sess: variable = variables.Variable(array_ops.ones([2, 3])) duration = array_ops.zeros([], dtype=dtypes.int32) @@ -2099,7 +2099,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(np.ones([2, 3]), sess.run(grad[0])) def testWhileGrad_Const(self): - with self.test_session() as sess: + with self.cached_session() as sess: c0 = constant_op.constant(0.0, name="c0") c1 = constant_op.constant(1.0, name="c1") duration = constant_op.constant(0, name="t") @@ -2118,7 +2118,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(0.0, sess.run(grad[0])) def testWhileGrad_SerialTwoLoops(self): - with self.test_session(): + with self.cached_session(): i = constant_op.constant(0, name="i") x = constant_op.constant(2.0, name="x") @@ -2136,7 +2136,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(1024.0, r[0].eval()) def testWhileGrad_ParallelTwoLoops(self): - with self.test_session(): + with self.cached_session(): i = constant_op.constant(0, name="i") x = constant_op.constant(2.0, name="x") @@ -2155,7 +2155,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(64.0, r[0].eval()) def testWhileGrad_OneOutputWithControlDependencyOnSecond(self): - with self.test_session(): + with self.cached_session(): i = constant_op.constant(0, name="i") x = constant_op.constant(1.0, name="x") y = constant_op.constant(1.0, name="y") @@ -2196,7 +2196,7 @@ class ControlFlowTest(test.TestCase): self._testNestedWhileGrad_Simple(use_gpu=True) def testNestedWhileGrad_SerialInner(self): - with self.test_session(): + with self.cached_session(): v = constant_op.constant(1.0) def inner_loop1(s): @@ -2219,7 +2219,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(256.0, r.eval()) def testNestedWhileGrad_ParallelInner(self): - with self.test_session(): + with self.cached_session(): v = constant_op.constant(1.0) def inner_loop1(s): @@ -2244,7 +2244,7 @@ class ControlFlowTest(test.TestCase): def testNestedWhileGrad_ParallelIterations(self): # Make sure the stack pushes and pops of an inner loop are executed in # the sequential order of the iterations of its outer loop. - with self.test_session() as sess: + with self.cached_session() as sess: def inner_loop(t): fn = lambda n: n + math_ops.square(var) @@ -2287,7 +2287,7 @@ class ControlFlowTest(test.TestCase): self._testWhileCondGrad_Simple(use_gpu=True) def testWhileCondGrad_UnknownShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: v = array_ops.placeholder(dtypes.float32) n = ops.convert_to_tensor(100.0, name="n") one = ops.convert_to_tensor(1.0, name="one") @@ -2304,7 +2304,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(1024.0, r) def testWhileGrad_Concat(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = variable_scope.get_variable("x", initializer=[[1., 2.]]) i0 = constant_op.constant(0) h0 = array_ops.zeros([0, 2]) @@ -2327,7 +2327,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose([[0.98000002, 1.98000002]], sess.run(x)) def testWhileWithRefsWithGradients_1(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = variables.Variable(0.)._ref() # pylint: disable=protected-access i = constant_op.constant(0) c = lambda i, x: math_ops.less(i, 10) @@ -2355,7 +2355,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(73, value_x_grad) def testWhileGrad_IndexedSlices(self): - with self.test_session(): + with self.cached_session(): values = constant_op.constant([2.0, 4.0], name="values") indices = constant_op.constant([0, 3], name="indices") shape = constant_op.constant([10], name="dense_shape") @@ -2376,7 +2376,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(np.array([1024.0, 1024.0]), r.eval()) def testWhileGrad_SparseTensor(self): - with self.test_session(): + with self.cached_session(): values = constant_op.constant([2.0, 4.0], name="values") indices = constant_op.constant( [[0], [3]], dtype=dtypes.int64, name="indices") @@ -2398,7 +2398,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(np.array([1024.0, 1024.0]), r.eval()) def testCallGradInLoop(self): - with self.test_session() as sess: + with self.cached_session() as sess: i0 = constant_op.constant(0) params = constant_op.constant(5.0) params_1 = math_ops.square(params) @@ -2417,7 +2417,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(600.0, sess.run(output_grad)[1]) def testWhileAndTensorArray(self): - with self.test_session() as sess: + with self.cached_session() as sess: param = constant_op.constant(2.0) n0 = constant_op.constant(0) y0 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="elems") @@ -2436,7 +2436,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(107520.0, sess.run(r)) def testWhileGrad_StopGrad(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant(3.0, name="x") y = constant_op.constant(2.0, name="y") @@ -2479,7 +2479,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(32.0, r.eval()) def testWhileGrad_StopGradInside(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant(3.0, name="x") y = constant_op.constant(2.0, name="y") @@ -2498,7 +2498,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(156.0, r.eval()) def testWhileGrad_StopGradInsideNoShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = array_ops.placeholder(dtypes.float32) y = array_ops.placeholder(dtypes.float32) @@ -2534,7 +2534,7 @@ class ControlFlowTest(test.TestCase): gradients_impl.gradients(grad_theta_stopped, theta) def testStopGradOnWhileGrad(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant(2.0, name="x") y = constant_op.constant(2.0, name="y") @@ -2562,7 +2562,7 @@ class ControlFlowTest(test.TestCase): _, y = control_flow_ops.while_loop(cond, body, (math_ops.argmin(q), 0.)) dy_dq, = gradients_impl.gradients(y, q) self.assertIsNotNone(dy_dq) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(q.initializer) self.assertAllClose([0., 0.], sess.run(dy_dq)) @@ -2579,7 +2579,7 @@ class ControlFlowTest(test.TestCase): _, y = control_flow_ops.while_loop(cond, body, (math_ops.argmin(q), 0.)) dy_dq, = gradients_impl.gradients(y, q) self.assertIsNotNone(dy_dq) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(q.initializer) self.assertAllClose([1., 1.], sess.run(dy_dq)) @@ -2607,7 +2607,7 @@ class ControlFlowTest(test.TestCase): self.assertIsNotNone(grad) def testStopGradMultiFlows(self): - with self.test_session(): + with self.cached_session(): def body(i, y, r): x = variable_scope.get_variable( @@ -2636,7 +2636,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): c = array_ops.placeholder(dtypes.int32, shape=[]) one = ops.convert_to_tensor(1, name="one") two = ops.convert_to_tensor(2, name="two") @@ -2654,7 +2654,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") - with self.test_session(): + with self.cached_session(): x = ops.convert_to_tensor([-2.0, 2.0], name="x") d = array_ops.placeholder(dtypes.int32, shape=[]) @@ -2672,7 +2672,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/112477618 (Operation returned from cond)") - with self.test_session(): + with self.cached_session(): x = constant_op.constant(1) y = constant_op.constant(2) z = constant_op.constant(3) @@ -2727,7 +2727,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/112477618 (Operation returned from cond)") - with self.test_session() as sess: + with self.cached_session() as sess: v0 = variables.Variable(-1) v1 = variables.Variable(-1) v2 = variables.Variable(-1) @@ -2765,7 +2765,7 @@ class ControlFlowTest(test.TestCase): if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113324949 (ref vars)") - with self.test_session(): + with self.cached_session(): v = variables.Variable(0) c = ops.convert_to_tensor(0) one = ops.convert_to_tensor(1) @@ -2793,7 +2793,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(2, v.eval()) def testWithOpsDependencies(self): - with self.test_session() as sess: + with self.cached_session() as sess: v = variables.Variable(0.0) c = constant_op.constant(10) @@ -2816,7 +2816,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(0.0, real_v_val) def testWithTensorDependencies(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable(0.0) c1 = constant_op.constant(10) c2 = constant_op.constant(20) @@ -2842,7 +2842,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(0.0, v.eval()) def testWithIndexedSlicesDependencies(self): - with self.test_session(): + with self.cached_session(): v = variables.Variable( np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32)) v_at_1 = ops.IndexedSlices(v, constant_op.constant([1])) @@ -2886,7 +2886,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual([b"loc:@vdef"], with_vdef_dep.op.colocation_groups()) def testGroup(self): - with self.test_session() as sess: + with self.cached_session() as sess: v1 = variables.Variable([0.0]) v2 = variables.Variable([1.0]) @@ -2997,7 +2997,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(None, s.get_shape()) def testRunLoopTensor(self): - with self.test_session() as sess: + with self.cached_session() as sess: tensor_list = [] def condition(t): @@ -3021,7 +3021,7 @@ class ControlFlowTest(test.TestCase): def func(x): return np.square(x) - with self.test_session(): + with self.cached_session(): r = control_flow_ops.while_loop( lambda i, v: i < 4, lambda i, v: [i + 1, script_ops.py_func(func, [v], [dtypes.float32])[0]], @@ -3035,7 +3035,7 @@ class ControlFlowTest(test.TestCase): def func(x): return math_ops.square(math_ops.square(x)) - with self.test_session(): + with self.cached_session(): x = constant_op.constant(2.0, dtypes.float32) r = control_flow_ops.while_loop( lambda i, v: i < 2, lambda i, v: [i + 1, func(v)], @@ -3174,7 +3174,7 @@ class TupleTest(test.TestCase): def testTensors(self): for v1_first in [True, False]: - with self.test_session(): + with self.cached_session(): v1 = variables.Variable([1.0]) add1 = math_ops.add( control_flow_ops.with_dependencies([v1.initializer], v1._ref()), # pylint: disable=protected-access @@ -3204,7 +3204,7 @@ class TupleTest(test.TestCase): def testIndexedSlices(self): for v1_first in [True, False]: - with self.test_session(): + with self.cached_session(): v1 = variables.Variable( np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype( np.float32)) @@ -3243,7 +3243,7 @@ class TupleTest(test.TestCase): v1.eval()) def testAcceptTensorsAsControlInputs(self): - with self.test_session(): + with self.cached_session(): var = variables.Variable(0) assign = state_ops.assign(var, 1) t, = control_flow_ops.tuple( diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index fcba456004..2d6d8a8051 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -53,7 +53,7 @@ class Conv1DTest(test.TestCase): self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) def testConv1DTranspose(self): - with self.test_session(): + with self.cached_session(): stride = 2 # Input, output: [batch, width, depth] diff --git a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py index be299beee4..644a151710 100644 --- a/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py +++ b/tensorflow/python/kernel_tests/conv2d_backprop_filter_grad_test.py @@ -32,7 +32,7 @@ from tensorflow.python.platform import test class Conv2DBackpropFilterGradTest(test.TestCase): def testGradient(self): - with self.test_session(): + with self.cached_session(): for padding in ["SAME", "VALID"]: for stride in [1, 2]: np.random.seed(1) diff --git a/tensorflow/python/kernel_tests/conv2d_transpose_test.py b/tensorflow/python/kernel_tests/conv2d_transpose_test.py index 27804be65c..cbdd2c5991 100644 --- a/tensorflow/python/kernel_tests/conv2d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv2d_transpose_test.py @@ -37,7 +37,7 @@ from tensorflow.python.platform import test class Conv2DTransposeTest(test.TestCase): def testConv2DTransposeSingleStride(self): - with self.test_session(): + with self.cached_session(): strides = [1, 1, 1, 1] # Input, output: [batch, height, width, depth] @@ -75,7 +75,7 @@ class Conv2DTransposeTest(test.TestCase): self.assertAllClose(target, value[n, h, w, k]) def testConv2DTransposeSame(self): - with self.test_session(): + with self.cached_session(): strides = [1, 2, 2, 1] # Input, output: [batch, height, width, depth] @@ -108,7 +108,7 @@ class Conv2DTransposeTest(test.TestCase): self.assertAllClose(target, value[n, h, w, k]) def testConv2DTransposeValid(self): - with self.test_session(): + with self.cached_session(): strides = [1, 2, 2, 1] # Input, output: [batch, height, width, depth] @@ -163,7 +163,7 @@ class Conv2DTransposeTest(test.TestCase): np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) f_val = np.random.random_sample(f_shape).astype(np.float64) - with self.test_session(): + with self.cached_session(): x = constant_op.constant(x_val, name="x", dtype=dtypes.float32) f = constant_op.constant(f_val, name="f", dtype=dtypes.float32) output = nn_ops.conv2d_transpose( diff --git a/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py b/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py index 85264ef876..89b64068ac 100644 --- a/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py +++ b/tensorflow/python/kernel_tests/conv3d_backprop_filter_v2_grad_test.py @@ -32,7 +32,7 @@ from tensorflow.python.platform import test class Conv3DBackpropFilterV2GradTest(test.TestCase): def testGradient(self): - with self.test_session(): + with self.cached_session(): for padding in ["SAME", "VALID"]: for stride in [1, 2]: np.random.seed(1) diff --git a/tensorflow/python/kernel_tests/conv3d_transpose_test.py b/tensorflow/python/kernel_tests/conv3d_transpose_test.py index 289ae29fce..2527b83769 100644 --- a/tensorflow/python/kernel_tests/conv3d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv3d_transpose_test.py @@ -32,7 +32,7 @@ from tensorflow.python.platform import test class Conv3DTransposeTest(test.TestCase): def testConv3DTransposeSingleStride(self): - with self.test_session(): + with self.cached_session(): strides = [1, 1, 1, 1, 1] # Input, output: [batch, depth, height, width, channel] @@ -82,7 +82,7 @@ class Conv3DTransposeTest(test.TestCase): self.assertAllClose(target, value[n, d, h, w, k]) def testConv3DTransposeSame(self): - with self.test_session(): + with self.cached_session(): strides = [1, 2, 2, 2, 1] # Input, output: [batch, depth, height, width, depth] @@ -134,7 +134,7 @@ class Conv3DTransposeTest(test.TestCase): def testConv3DTransposeOutputShapeType(self): # Test case for GitHub issue 18887 for dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(): + with self.cached_session(): x_shape = [2, 5, 6, 4, 3] y_shape = [2, 5, 6, 4, 2] f_shape = [3, 3, 3, 2, 3] @@ -149,7 +149,7 @@ class Conv3DTransposeTest(test.TestCase): output.eval() def testConv3DTransposeValid(self): - with self.test_session(): + with self.cached_session(): strides = [1, 2, 2, 2, 1] # Input, output: [batch, depth, height, width, depth] @@ -209,7 +209,7 @@ class Conv3DTransposeTest(test.TestCase): np.random.seed(1) # Make it reproducible. x_val = np.random.random_sample(x_shape).astype(np.float64) f_val = np.random.random_sample(f_shape).astype(np.float64) - with self.test_session(): + with self.cached_session(): x = constant_op.constant(x_val, name="x", dtype=dtypes.float32) f = constant_op.constant(f_val, name="f", dtype=dtypes.float32) output = nn_ops.conv3d_transpose( diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 0b531125f3..6794464e3a 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -108,7 +108,7 @@ class Conv3DTest(test.TestCase): use_gpu=use_gpu) results.append(result) - with self.test_session() as sess: + with self.cached_session() as sess: values = sess.run(results) for value in values: print("expected = ", expected) @@ -183,7 +183,7 @@ class Conv3DTest(test.TestCase): expected_results.append(expected) computed_results.append(computed) tolerance = 1e-2 if use_gpu else 1e-5 - with self.test_session() as sess: + with self.cached_session() as sess: expected_values = sess.run(expected_results) computed_values = sess.run(computed_results) for e_value, c_value in zip(expected_values, computed_values): diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 00de94f004..ea611497d9 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -1474,7 +1474,7 @@ class Conv2DTest(test.TestCase): padding="SAME") def testOpEdgeCases(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Illegal strides. with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "strides in the batch and depth"): @@ -1539,7 +1539,7 @@ class DepthwiseConv2DTest(test.TestCase): # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] - with self.test_session() as sess: + with self.cached_session() as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) diff --git a/tensorflow/python/kernel_tests/cross_grad_test.py b/tensorflow/python/kernel_tests/cross_grad_test.py index f040ac6055..0bd4006d6a 100644 --- a/tensorflow/python/kernel_tests/cross_grad_test.py +++ b/tensorflow/python/kernel_tests/cross_grad_test.py @@ -27,7 +27,7 @@ from tensorflow.python.platform import test class CrossOpTest(test.TestCase): def testGradientRandomValues(self): - with self.test_session(): + with self.cached_session(): us = [2, 3] u = array_ops.reshape( [0.854, -0.616, 0.767, 0.725, -0.927, 0.159], shape=us) diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index b61232cded..00d7f956c2 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -541,7 +541,7 @@ class UnaryOpTest(test.TestCase): return x for op, real_range in op_range: - with self.test_session(): + with self.cached_session(): for dtype, tol in dtype_tols: x = constant_op.constant(rand(dtype)) y = constant_op.constant(rand(dtype)) @@ -604,7 +604,7 @@ class BinaryOpTest(test.TestCase): numeric_gradient_type=None): z = np_func(x, y) zs = list(z.shape) - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) if x.dtype in (np.float32, np.float64): @@ -634,7 +634,7 @@ class BinaryOpTest(test.TestCase): numeric_gradient_type=None): z = np_func(x, y) zs = list(z.shape) - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) if x.dtype in (np.float32, np.float64): @@ -720,7 +720,7 @@ class BinaryOpTest(test.TestCase): def testFloatDifferentShapes(self): x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.float32) y = np.array([1, 2]).reshape(2, 1).astype(np.float32) - with self.test_session() as sess: + with self.cached_session() as sess: inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) s = math_ops.reduce_sum(inx * iny) @@ -736,7 +736,7 @@ class BinaryOpTest(test.TestCase): y = np.array([1, 2]).reshape(2, 1).astype(np.int32) var_x = variables.Variable(x) var_y = variables.Variable(y) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run([var_x.initializer, var_y.initializer]) left_result = (var_x * y).eval() right_result = (x * var_y).eval() @@ -1168,7 +1168,7 @@ class BinaryOpTest(test.TestCase): ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]])) def testZeroPowGrad(self): - with self.test_session(): + with self.cached_session(): for dtype in (np.float16, np.float32, np.float64, np.complex64, np.complex128): x = constant_op.constant(0.0, dtype=dtype) @@ -1178,7 +1178,7 @@ class BinaryOpTest(test.TestCase): self.assertEqual(error, 0) def testComplexPowGrad(self): - with self.test_session(): + with self.cached_session(): for dtype in np.complex64, np.complex128: for base in 2.0, -2.0: x = constant_op.constant(base, dtype=dtype) @@ -1470,7 +1470,7 @@ class SelectOpTest(test.TestCase): self.assertShapeEqual(np_ans, out) def _compareGradientX(self, c, x, y, numeric_gradient_type=None): - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = array_ops.where(c, inx, iny) @@ -1494,7 +1494,7 @@ class SelectOpTest(test.TestCase): self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5) def _compareGradientY(self, c, x, y, numeric_gradient_type=None): - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = array_ops.where(c, inx, iny) @@ -1582,7 +1582,7 @@ class SelectOpTest(test.TestCase): x = np.random.rand(1, 3, 0) * 100 y = np.random.rand(1, 3, 0) * 100 z_expected = np.zeros((1, 3, 0), dtype=np.float32) - with self.test_session(): + with self.cached_session(): xt = x.astype(np.float32) yt = y.astype(np.float32) z = array_ops.where(c, xt, yt).eval() @@ -1590,7 +1590,7 @@ class SelectOpTest(test.TestCase): def testNan(self): """Verify that nans don't propagate where they shouldn't.""" - with self.test_session(): + with self.cached_session(): for c in False, True: for a in 7.0, np.nan: for b in 5.0, np.nan: @@ -1614,7 +1614,7 @@ class BatchSelectOpTest(test.TestCase): self.assertShapeEqual(np_ans, out) def _compareGradientX(self, c, x, y, numeric_gradient_type=None): - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = array_ops.where(c, inx, iny) @@ -1638,7 +1638,7 @@ class BatchSelectOpTest(test.TestCase): self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5) def _compareGradientY(self, c, x, y, numeric_gradient_type=None): - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = array_ops.where(c, inx, iny) @@ -1745,7 +1745,7 @@ class MinMaxOpTest(test.TestCase): self._compare(x.astype(t), t(y), use_gpu=True) def _compareGradientX(self, func, x, y): - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = func(inx, iny) @@ -1760,7 +1760,7 @@ class MinMaxOpTest(test.TestCase): self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5) def _compareGradientY(self, func, x, y): - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) iny = ops.convert_to_tensor(y) out = func(inx, iny) @@ -1932,7 +1932,7 @@ class RoundingTest(test.TestCase): def _compare_values(self, x, y=None): y = np.rint(x) if y is None else np.asarray(y) - with self.test_session() as sess: + with self.cached_session() as sess: tf_rint = math_ops.rint(x) np_rint = sess.run(tf_rint) self.assertAllEqual(y, np_rint) @@ -1940,7 +1940,7 @@ class RoundingTest(test.TestCase): def _compare(self, x): np_floor, np_ceil = np.floor(x), np.ceil(x) - with self.test_session() as sess: + with self.cached_session() as sess: inx = ops.convert_to_tensor(x) ofloor, oceil = math_ops.floor(inx), math_ops.ceil(inx) tf_floor, tf_ceil = sess.run([ofloor, oceil]) @@ -2099,7 +2099,7 @@ class ComplexMakeRealImagTest(test.TestCase): # computes the squared sum. This is obviously the same as sum(real # * real) + sum(imag * imag). We just want to make sure the # gradient function is checked. - with self.test_session(): + with self.cached_session(): inx = ops.convert_to_tensor(x) real, imag = array_ops.split(value=inx, num_or_size_splits=2, axis=1) real, imag = array_ops.reshape(real, [-1]), array_ops.reshape(imag, [-1]) @@ -2116,7 +2116,7 @@ class ComplexMakeRealImagTest(test.TestCase): def _compareBroadcastGradient(self, x): x_ = ops.convert_to_tensor(x) epsilon = 1e-3 - with self.test_session(): + with self.cached_session(): for args in [(x_, 0.), (0., x_)]: z = math_ops.reduce_sum(math_ops.abs(math_ops.complex(*args))) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -2136,7 +2136,7 @@ class ComplexMakeRealImagTest(test.TestCase): # data is a float matrix of shape [n, 4]. data[:, 0], data[:, 1], # data[:, 2], data[:, 3] are real parts of x, imaginary parts of # x, real parts of y and imaginary parts of y. - with self.test_session(): + with self.cached_session(): inp = ops.convert_to_tensor(data) xr, xi, yr, yi = array_ops.split(value=inp, num_or_size_splits=4, axis=1) @@ -2166,7 +2166,7 @@ class ComplexMakeRealImagTest(test.TestCase): class AccumulateTest(test.TestCase): def testSimple(self): - with self.test_session(): + with self.cached_session(): random_arrays = [ np.random.rand(16, 16, 16, 16).astype(np.float32) for _ in range(20) ] @@ -2181,20 +2181,20 @@ class AccumulateTest(test.TestCase): self.assertAllClose(np_val, tf_val.eval()) def testZeroArgs(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): tf_val = math_ops.accumulate_n([]) tf_val.eval() def testWrongShape(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): a = variables.Variable(0.2) b = variables.Variable(0.1) math_ops.accumulate_n([a, b], shape=[2, 2]) # Should be shape=[] def testWrongType(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) @@ -2202,7 +2202,7 @@ class AccumulateTest(test.TestCase): def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) math_ops.accumulate_n([a], tensor_dtype=np.int32) @@ -2214,7 +2214,7 @@ class PolyvalTest(test.TestCase): x = np.random.rand(2, 2).astype(dtype) coeffs = [np.random.rand(2, 2).astype(dtype) for _ in range(degree + 1)] np_val = np.polyval(coeffs, x) - with self.test_session(): + with self.cached_session(): tf_val = math_ops.polyval(coeffs, x) self.assertAllClose(np_val, tf_val.eval()) @@ -2237,7 +2237,7 @@ class PolyvalTest(test.TestCase): for _ in range(degree + 1) ] np_val = np.polyval(coeffs, x) - with self.test_session(): + with self.cached_session(): tf_val = math_ops.polyval(coeffs, x) self.assertAllClose(np_val, tf_val.eval()) @@ -2245,7 +2245,7 @@ class PolyvalTest(test.TestCase): x = np.random.rand(2, 2).astype(np.float32) coeffs = [] np_val = np.polyval(coeffs, x) - with self.test_session(): + with self.cached_session(): tf_val = math_ops.polyval(coeffs, x) self.assertAllClose(np_val, tf_val.eval()) diff --git a/tensorflow/python/kernel_tests/decode_bmp_op_test.py b/tensorflow/python/kernel_tests/decode_bmp_op_test.py index 35f8f76991..eebaffbe13 100644 --- a/tensorflow/python/kernel_tests/decode_bmp_op_test.py +++ b/tensorflow/python/kernel_tests/decode_bmp_op_test.py @@ -60,7 +60,7 @@ class DecodeBmpOpTest(test.TestCase): img_in = constant_op.constant(byte_string, dtype=dtypes.string) decode = array_ops.squeeze(image_ops.decode_bmp(img_in)) - with self.test_session(): + with self.cached_session(): decoded = decode.eval() self.assertAllEqual(decoded, img_bytes) @@ -135,7 +135,7 @@ class DecodeBmpOpTest(test.TestCase): img_in = constant_op.constant(byte_string, dtype=dtypes.string) decode = image_ops.decode_bmp(img_in) - with self.test_session(): + with self.cached_session(): decoded = decode.eval() self.assertAllEqual(decoded, img_bytes) diff --git a/tensorflow/python/kernel_tests/decode_compressed_op_test.py b/tensorflow/python/kernel_tests/decode_compressed_op_test.py index c9bda58ca7..1cc1c7da30 100644 --- a/tensorflow/python/kernel_tests/decode_compressed_op_test.py +++ b/tensorflow/python/kernel_tests/decode_compressed_op_test.py @@ -44,7 +44,7 @@ class DecodeCompressedOpTest(test.TestCase): def testDecompress(self): for compression_type in ["ZLIB", "GZIP", ""]: - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[2]) decompressed = parsing_ops.decode_compressed( in_bytes, compression_type=compression_type) @@ -57,7 +57,7 @@ class DecodeCompressedOpTest(test.TestCase): def testDecompressWithRaw(self): for compression_type in ["ZLIB", "GZIP", ""]: - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) decompressed = parsing_ops.decode_compressed( in_bytes, compression_type=compression_type) diff --git a/tensorflow/python/kernel_tests/decode_csv_op_test.py b/tensorflow/python/kernel_tests/decode_csv_op_test.py index 4f49d72676..40b17a11f8 100644 --- a/tensorflow/python/kernel_tests/decode_csv_op_test.py +++ b/tensorflow/python/kernel_tests/decode_csv_op_test.py @@ -27,7 +27,7 @@ from tensorflow.python.platform import test class DecodeCSVOpTest(test.TestCase): def _test(self, args, expected_out=None, expected_err_re=None): - with self.test_session() as sess: + with self.cached_session() as sess: decode = parsing_ops.decode_csv(**args) if expected_err_re is None: diff --git a/tensorflow/python/kernel_tests/decode_image_op_test.py b/tensorflow/python/kernel_tests/decode_image_op_test.py index 58280432d6..7f73fbaa84 100644 --- a/tensorflow/python/kernel_tests/decode_image_op_test.py +++ b/tensorflow/python/kernel_tests/decode_image_op_test.py @@ -111,7 +111,7 @@ class DecodeImageOpTest(test.TestCase): def testInvalidBytes(self): image_bytes = b"ThisIsNotAnImage!" decode = image_ops.decode_image(image_bytes) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): decode.eval() diff --git a/tensorflow/python/kernel_tests/decode_png_op_test.py b/tensorflow/python/kernel_tests/decode_png_op_test.py index d2e03938ee..8f36343667 100644 --- a/tensorflow/python/kernel_tests/decode_png_op_test.py +++ b/tensorflow/python/kernel_tests/decode_png_op_test.py @@ -46,7 +46,7 @@ class DecodePngOpTest(test.TestCase): image_ops.decode_png( img_in, dtype=dtypes.uint16)) - with self.test_session(): + with self.cached_session(): decoded = decode.eval() self.assertAllEqual(decoded, img_bytes) diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py index 122a9ed469..dc01f4196a 100644 --- a/tensorflow/python/kernel_tests/decode_raw_op_test.py +++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py @@ -29,7 +29,7 @@ from tensorflow.python.platform import test class DecodeRawOpTest(test.TestCase): def testToUint8(self): - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[2]) decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.uint8) self.assertEqual([2, None], decode.get_shape().as_list()) @@ -47,7 +47,7 @@ class DecodeRawOpTest(test.TestCase): decode.eval(feed_dict={in_bytes: ["short", "longer"]}) def testToInt16(self): - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.int16) self.assertEqual([None, None], decode.get_shape().as_list()) @@ -62,7 +62,7 @@ class DecodeRawOpTest(test.TestCase): decode.eval(feed_dict={in_bytes: ["123", "456"]}) def testEndianness(self): - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) decode_le = parsing_ops.decode_raw( in_bytes, out_type=dtypes.int32, little_endian=True) @@ -74,7 +74,7 @@ class DecodeRawOpTest(test.TestCase): self.assertAllEqual([[0x01020304]], result) def testToFloat16(self): - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16) self.assertEqual([None, None], decode.get_shape().as_list()) @@ -85,7 +85,7 @@ class DecodeRawOpTest(test.TestCase): self.assertAllEqual(expected_result, result) def testEmptyStringInput(self): - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16) @@ -94,7 +94,7 @@ class DecodeRawOpTest(test.TestCase): self.assertEqual((num_inputs, 0), result.shape) def testToUInt16(self): - with self.test_session(): + with self.cached_session(): in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.uint16) self.assertEqual([None, None], decode.get_shape().as_list()) diff --git a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py index d33bf1ba12..affbaf159d 100644 --- a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py +++ b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py @@ -33,7 +33,7 @@ class AssignOpTest(test.TestCase): # contain benign and deliberate data races when multiple threads update # the same parameters without a lock. def testParallelUpdateWithoutLocking(self): - with self.test_session() as sess: + with self.cached_session() as sess: ones_t = array_ops.fill([1024, 1024], 1.0) p = variables.Variable(array_ops.zeros([1024, 1024])) adds = [ @@ -60,7 +60,7 @@ class AssignOpTest(test.TestCase): self.assertTrue((vals <= ones * 20).all()) def testParallelAssignWithoutLocking(self): - with self.test_session() as sess: + with self.cached_session() as sess: ones_t = array_ops.fill([1024, 1024], float(1)) p = variables.Variable(array_ops.zeros([1024, 1024])) assigns = [ @@ -92,7 +92,7 @@ class AssignOpTest(test.TestCase): # returning the output tensors. This issue will be resolved with the new # resource variables. def testParallelUpdateWithLocking(self): - with self.test_session() as sess: + with self.cached_session() as sess: zeros_t = array_ops.fill([1024, 1024], 0.0) ones_t = array_ops.fill([1024, 1024], 1.0) p = variables.Variable(zeros_t) @@ -119,7 +119,7 @@ class AssignOpTest(test.TestCase): self.assertAllEqual(vals, ones * 20) def testParallelAssignWithLocking(self): - with self.test_session() as sess: + with self.cached_session() as sess: zeros_t = array_ops.fill([1024, 1024], 0.0) ones_t = array_ops.fill([1024, 1024], 1.0) p = variables.Variable(zeros_t) diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py index 4dda9f093b..06c3271850 100644 --- a/tensorflow/python/kernel_tests/dense_update_ops_test.py +++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py @@ -85,7 +85,7 @@ class AssignOpTest(test.TestCase): self._testTypes(np.arange(0, 20).reshape([4, 5])) def testAssignNonStrictShapeChecking(self): - with self.test_session(): + with self.cached_session(): data = array_ops.fill([1024, 1024], 0) p = variables.Variable([1]) a = state_ops.assign(p, data, validate_shape=False) @@ -99,14 +99,14 @@ class AssignOpTest(test.TestCase): self.assertAllEqual(p.eval(), data2.eval()) def testInitRequiredAssignAdd(self): - with self.test_session(): + with self.cached_session(): p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32) a = state_ops.assign_add(p, array_ops.fill([1024, 1024], 0)) with self.assertRaisesOpError("use uninitialized"): a.op.run() def testInitRequiredAssignSub(self): - with self.test_session(): + with self.cached_session(): p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32) a = state_ops.assign_sub(p, array_ops.fill([1024, 1024], 0)) with self.assertRaisesOpError("use uninitialized"): diff --git a/tensorflow/python/kernel_tests/division_future_test.py b/tensorflow/python/kernel_tests/division_future_test.py index e681b32856..e477bdc73b 100644 --- a/tensorflow/python/kernel_tests/division_future_test.py +++ b/tensorflow/python/kernel_tests/division_future_test.py @@ -50,7 +50,7 @@ class DivisionTestCase(test.TestCase): self.assertEqual(x, y) checks.append(f) - with self.test_session() as sess: + with self.cached_session() as sess: for dtype in dtypes: for x in map(dtype, values): for y in map(dtype, values): diff --git a/tensorflow/python/kernel_tests/division_past_test.py b/tensorflow/python/kernel_tests/division_past_test.py index 9ddd62e63c..63951b5b38 100644 --- a/tensorflow/python/kernel_tests/division_past_test.py +++ b/tensorflow/python/kernel_tests/division_past_test.py @@ -49,7 +49,7 @@ class DivisionTestCase(test.TestCase): self.assertEqual(x, y) checks.append(f) - with self.test_session() as sess: + with self.cached_session() as sess: for dtype in dtypes: for x in map(dtype, values): for y in map(dtype, values): diff --git a/tensorflow/python/kernel_tests/duplicate_op_test.py b/tensorflow/python/kernel_tests/duplicate_op_test.py index 529d3dd0b3..654267a582 100644 --- a/tensorflow/python/kernel_tests/duplicate_op_test.py +++ b/tensorflow/python/kernel_tests/duplicate_op_test.py @@ -34,7 +34,7 @@ class DuplicateOpTest(test.TestCase): self.assertEqual(len(duplicate.OP_LIST.op), 0) - with self.test_session(): + with self.cached_session(): self.assertEqual(math_ops.add(1, 41).eval(), 42) diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py index 5e8937ad2c..9557e30993 100644 --- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py @@ -288,7 +288,7 @@ class DynamicPartitionTest(test.TestCase): self.assertAllEqual([], partition_vals[i]) def testErrorIndexOutOfRange(self): - with self.test_session() as sess: + with self.cached_session() as sess: data = constant_op.constant([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 13, 14]]) indices = constant_op.constant([0, 2, 99, 2, 2]) @@ -298,7 +298,7 @@ class DynamicPartitionTest(test.TestCase): sess.run(partitions) def testScalarIndexOutOfRange(self): - with self.test_session() as sess: + with self.cached_session() as sess: bad = 17 data = np.zeros(5) partitions = data_flow_ops.dynamic_partition(data, bad, num_partitions=7) @@ -306,7 +306,7 @@ class DynamicPartitionTest(test.TestCase): sess.run(partitions) def testHigherRankIndexOutOfRange(self): - with self.test_session() as sess: + with self.cached_session() as sess: shape = (2, 3) indices = array_ops.placeholder(shape=shape, dtype=np.int32) data = np.zeros(shape + (5,)) @@ -334,7 +334,7 @@ class DynamicPartitionTest(test.TestCase): inds += [13]*194 + [14]*194 + [15]*192 self.assertEqual(len(inds), x.shape[0]) partitioned = data_flow_ops.dynamic_partition(x, inds, 16) - with self.test_session() as sess: + with self.cached_session() as sess: res = sess.run(partitioned) self.assertEqual(res[-1].shape[0], 192) diff --git a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py index 49b9569e2b..3a1036e52a 100644 --- a/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_stitch_op_test.py @@ -252,7 +252,7 @@ class ParallelDynamicStitchTest(DynamicStitchTestBase, test.TestCase): # GPU version unit tests def testScalarGPU(self): - with self.test_session(): + with self.cached_session(): indices = [constant_op.constant(0), constant_op.constant(1)] data = [constant_op.constant(40.0), constant_op.constant(60.0)] for step in -1, 1: @@ -263,7 +263,7 @@ class ParallelDynamicStitchTest(DynamicStitchTestBase, test.TestCase): self.assertEqual([2], stitched_t.get_shape().as_list()) def testHigherRankGPU(self): - with self.test_session() as sess: + with self.cached_session() as sess: indices = [ constant_op.constant(6), constant_op.constant([4, 1]), diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py index dcd435e1ff..40b8548cea 100644 --- a/tensorflow/python/kernel_tests/embedding_ops_test.py +++ b/tensorflow/python/kernel_tests/embedding_ops_test.py @@ -242,7 +242,7 @@ class EmbeddingLookupTest(test.TestCase): # vector is going to be empty. The subsequent DivOp fails because of that. # TODO(keveman): Disabling the test until the underlying problem is fixed. def testSimpleSharded(self): - with self.test_session(): + with self.cached_session(): num_shards = 2 vocab_size = 4 p, params, feed_dict = _EmbeddingParams(num_shards, vocab_size) @@ -258,7 +258,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testMaxNorm(self): - with self.test_session(): + with self.cached_session(): embeddings = constant_op.constant([[2.0]]) ids = constant_op.constant([0], dtype=dtypes.int32) @@ -268,7 +268,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertAllEqual(embedding.eval(), [[1.0]]) def testMaxNormNontrivial(self): - with self.test_session(): + with self.cached_session(): embeddings = constant_op.constant([[2.0, 4.0], [3.0, 1.0]]) ids = constant_op.constant([0, 1], dtype=dtypes.int32) @@ -281,7 +281,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertAllEqual(embedding.eval(), 2 * normalized.eval()) def testSimpleShardedPartitionedVariable(self): - with self.test_session() as sess: + with self.cached_session() as sess: num_shards = 2 vocab_size = 4 p, p_variable, params, feed_dict = _EmbeddingParamsAsPartitionedVariable( @@ -303,7 +303,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testSimpleShardedPartitionedResourceVariable(self): - with self.test_session() as sess: + with self.cached_session() as sess: num_shards = 2 vocab_size = 4 p, p_variable, params, _ = _EmbeddingParamsAsPartitionedVariable( @@ -326,7 +326,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testShardedModPartitioningInt32Ids(self): - with self.test_session(): + with self.cached_session(): num_shards = 5 vocab_size = 13 # Embedding dimensions is 10. The vocab_size x 10 embedding @@ -348,7 +348,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testShardedModPartitioningInt64Ids(self): - with self.test_session(): + with self.cached_session(): num_shards = 5 vocab_size = 13 # Embedding dimensions is 10. The vocab_size x 10 embedding @@ -370,7 +370,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testShardedDivPartitioningInt32Ids(self): - with self.test_session(): + with self.cached_session(): num_shards = 5 vocab_size = 13 # Embedding dimensions is 10. The vocab_size x 10 embedding @@ -394,7 +394,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testShardedDivPartitioningInt32IdsPartitionedVariable(self): - with self.test_session(): + with self.cached_session(): num_shards = 5 vocab_size = 13 # Embedding dimensions is 10. The vocab_size x 10 embedding @@ -419,7 +419,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testShardedDivPartitioningInt64Ids(self): - with self.test_session(): + with self.cached_session(): num_shards = 5 vocab_size = 13 # Embedding dimensions is 10. The vocab_size x 10 embedding @@ -443,7 +443,7 @@ class EmbeddingLookupTest(test.TestCase): self.assertShapeEqual(np_result, embedding) def testShardedDivPartitioningUnknownParamShape(self): - with self.test_session(): + with self.cached_session(): num_shards = 5 vocab_size = 13 # Embedding dimensions is 10. The vocab_size x 10 embedding @@ -475,7 +475,7 @@ class EmbeddingLookupTest(test.TestCase): tf_logging.vlog(1, id_vals) for ids_shape in [(10,), (2, 5)]: for num_shards in [1, 3]: - with self.test_session(): + with self.cached_session(): ids = constant_op.constant( id_vals, shape=ids_shape, dtype=dtypes.int32) x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=[2]) @@ -494,7 +494,7 @@ class EmbeddingLookupTest(test.TestCase): id_vals = list(np.random.randint(vocab_size, size=num_ids)) tf_logging.vlog(1, id_vals) for num_shards in [1, 3]: - with self.test_session(): + with self.cached_session(): ids = constant_op.constant(id_vals, dtype=dtypes.int32) x, params, _ = _EmbeddingParams(num_shards, vocab_size, shape=[2]) # This will force a conversion from IndexedSlices to Tensor. @@ -528,7 +528,7 @@ class EmbeddingLookupTest(test.TestCase): def testHigherRank(self): np.random.seed(8) - with self.test_session(): + with self.cached_session(): for params_shape in (12,), (6, 3): params = np.random.randn(*params_shape) for ids_shape in (3, 2), (4, 3): @@ -548,7 +548,7 @@ class EmbeddingLookupTest(test.TestCase): def testHigherRankMaxNorm(self): np.random.seed(8) - with self.test_session(): + with self.cached_session(): for params_shape in (12,), (6, 3), (6, 2, 3): # Test embedding rank 0, 1, 2. # Note: the first dimension must be a common multiple of procs below. @@ -581,7 +581,7 @@ class EmbeddingLookupTest(test.TestCase): # It always applies max_norm. np.random.seed(8) l2_norm = 2. - with self.test_session(): + with self.cached_session(): # Param values are in [l2_norm, l2_norm+1) so it will always clip. params = np.random.rand(6, 3) + l2_norm params_norm = l2_norm * params / np.sqrt( @@ -667,7 +667,7 @@ class EmbeddingLookupSparseTest(test.TestCase): [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64], [True, False]): - with self.test_session(): + with self.cached_session(): p, params, feed_dict = _EmbeddingParams( num_shards, vocab_size, shape=param_shape, dtype=dtype) embedding_sum = embedding_ops.embedding_lookup_sparse( @@ -716,7 +716,7 @@ class EmbeddingLookupSparseTest(test.TestCase): for num_shards, combiner, dtype, ignore_weights in itertools.product( [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32, dtypes.float64], [True, False]): - with self.test_session(): + with self.cached_session(): x, params, _ = _EmbeddingParams( num_shards, vocab_size, shape=param_shape, dtype=dtype) @@ -734,7 +734,7 @@ class EmbeddingLookupSparseTest(test.TestCase): self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3) def testIncompatibleShapes(self): - with self.test_session(): + with self.cached_session(): x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32) sp_ids = sparse_tensor.SparseTensor( constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64), @@ -819,7 +819,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): return sparse_ids, sparse_weights def test_safe_embedding_lookup_sparse_return_zero_vector(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, sparse_weights = self._ids_and_weights_2d() @@ -832,7 +832,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) def test_safe_embedding_lookup_sparse_return_special_vector(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, sparse_weights = self._ids_and_weights_2d() @@ -846,7 +846,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): embedding_weights[0][2], embedding_weights[0][3]]) def test_safe_embedding_lookup_sparse_no_weights(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, _ = self._ids_and_weights_2d() @@ -860,7 +860,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) def test_safe_embedding_lookup_sparse_partitioned(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights(num_shards=3) sparse_ids, _ = self._ids_and_weights_2d() @@ -874,7 +874,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): (embedding_weights[0] + embedding_weights[1]) / 2.0]) def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights(num_shards=3) sparse_ids, sparse_weights = self._ids_and_weights_2d() @@ -889,7 +889,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): embedding_weights, sparse_ids, sparse_weights) def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, sparse_weights = self._ids_and_weights_3d() @@ -902,7 +902,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, sparse_weights = self._ids_and_weights_3d() @@ -918,7 +918,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): ]]) def test_safe_embedding_lookup_sparse_3d_no_weights(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights() sparse_ids, _ = self._ids_and_weights_3d() @@ -934,7 +934,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): ]]) def test_safe_embedding_lookup_sparse_3d_partitioned(self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights(num_shards=3) sparse_ids, _ = self._ids_and_weights_3d() @@ -951,7 +951,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( self): - with self.test_session(): + with self.cached_session(): embedding_weights = self._random_weights(num_shards=3) sparse_ids, sparse_weights = self._ids_and_weights_3d() @@ -1035,7 +1035,7 @@ class DynamicStitchOpTest(test.TestCase): # We expect that the values are merged in order. def testStitchOrder(self): - with self.test_session(): + with self.cached_session(): indices = [] np_values = [] values = [] diff --git a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py index e1f5a6b620..7d9d4e5175 100644 --- a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py +++ b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py @@ -83,7 +83,7 @@ class ExtractImagePatchesGradTest(test.TestCase): random_seed = 42 random_seed_lib.set_random_seed(random_seed) - with self.test_session(): + with self.cached_session(): for test_case in self._TEST_CASES: np.random.seed(random_seed) in_shape = test_case['in_shape'] diff --git a/tensorflow/python/kernel_tests/fft_ops_test.py b/tensorflow/python/kernel_tests/fft_ops_test.py index 629acedda5..f117934e4b 100644 --- a/tensorflow/python/kernel_tests/fft_ops_test.py +++ b/tensorflow/python/kernel_tests/fft_ops_test.py @@ -496,7 +496,7 @@ class RFFTOpsTest(BaseFFTOpsTest): "Input dimension .* must have length of at least 6 but got: 5"): x = np.zeros((5,) * rank).astype(np.float32) fft_length = [6] * rank - with self.test_session(): + with self.cached_session(): rfft_fn(x, fft_length).eval() with self.assertRaisesWithPredicateMatch( @@ -504,7 +504,7 @@ class RFFTOpsTest(BaseFFTOpsTest): "Input dimension .* must have length of at least .* but got: 3"): x = np.zeros((3,) * rank).astype(np.complex64) fft_length = [6] * rank - with self.test_session(): + with self.cached_session(): irfft_fn(x, fft_length).eval() def testGrad_Simple(self): diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py index 9e7b528338..a5f8f64e0c 100644 --- a/tensorflow/python/kernel_tests/fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/fifo_queue_test.py @@ -99,19 +99,19 @@ class FIFOQueueTest(test.TestCase): """, q.queue_ref.op.node_def) def testEnqueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) enqueue_op = q.enqueue((10.0,)) enqueue_op.run() def testEnqueueHalf(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float16) enqueue_op = q.enqueue((10.0,)) enqueue_op.run() def testEnqueueWithShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shapes=(3, 2)) enqueue_correct_op = q.enqueue(([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],)) enqueue_correct_op.run() @@ -120,7 +120,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(1, q.size().eval()) def testEnqueueManyWithShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue( 10, [dtypes_lib.int32, dtypes_lib.int32], shapes=[(), (2,)]) q.enqueue_many([[1, 2, 3, 4], [[1, 1], [2, 2], [3, 3], [4, 4]]]).run() @@ -143,7 +143,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(self.evaluate(q.dequeue()), 1) def testEnqueueDictWithoutNames(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) with self.assertRaisesRegexp(ValueError, "must have names"): q.enqueue({"a": 12.0}) @@ -151,7 +151,7 @@ class FIFOQueueTest(test.TestCase): q.enqueue_many({"a": [12.0, 13.0]}) def testParallelEnqueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -177,7 +177,7 @@ class FIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, results) def testParallelDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -201,7 +201,7 @@ class FIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, results) def testDequeue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -215,7 +215,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([elems[i]], vals) def testDequeueHalf(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float16) elems = [10.0, 20.0, 30.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -229,7 +229,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([elems[i]], vals) def testEnqueueAndBlockingDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(3, dtypes_lib.float32) elems = [10.0, 20.0, 30.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -259,7 +259,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([elem], result) def testMultiEnqueueAndDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32)) elems = [(5, 10.0), (10, 20.0), (15, 30.0)] enqueue_ops = [q.enqueue((x, y)) for x, y in elems] @@ -275,12 +275,12 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([y], y_val) def testQueueSizeEmpty(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) self.assertEqual([0], q.size().eval()) def testQueueSizeAfterEnqueueAndDequeue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) enqueue_op = q.enqueue((10.0,)) dequeued_t = q.dequeue() @@ -293,7 +293,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(0, size.eval()) def testEnqueueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -306,7 +306,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([elems[i % 4]], vals) def testEmptyEnqueueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) empty_t = constant_op.constant( [], dtype=dtypes_lib.float32, shape=[0, 2, 3]) @@ -318,7 +318,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([0], size_t.eval()) def testEmptyDequeueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shapes=()) enqueue_op = q.enqueue((10.0,)) dequeued_t = q.dequeue_many(0) @@ -328,7 +328,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([], dequeued_t.eval().tolist()) def testEmptyDequeueUpTo(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shapes=()) enqueue_op = q.enqueue((10.0,)) dequeued_t = q.dequeue_up_to(0) @@ -338,14 +338,14 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([], dequeued_t.eval().tolist()) def testEmptyDequeueManyWithNoShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) # Expect the operation to fail due to the shape not being constrained. with self.assertRaisesOpError("specified shapes"): q.dequeue_many(0).eval() def testMultiEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, (dtypes_lib.float32, dtypes_lib.int32)) float_elems = [10.0, 20.0, 30.0, 40.0] int_elems = [[1, 2], [3, 4], [5, 6], [7, 8]] @@ -361,7 +361,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(int_elems[i % 4], int_val) def testDequeueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_op = q.enqueue_many((elems,)) @@ -373,7 +373,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(elems[4:8], dequeued_t.eval()) def testDequeueUpToNoBlocking(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_op = q.enqueue_many((elems,)) @@ -385,7 +385,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(elems[4:8], dequeued_t.eval()) def testMultiDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue( 10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (2,))) float_elems = [ @@ -416,7 +416,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape()) def testMultiDequeueUpToNoBlocking(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue( 10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (2,))) float_elems = [ @@ -440,7 +440,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(int_elems[4:8], int_val) def testHighDimension(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.int32, (4, 4, 4, 4)) elems = np.array([[[[[x] * 4] * 4] * 4] * 4 for x in range(10)], np.int32) enqueue_op = q.enqueue_many((elems,)) @@ -494,7 +494,7 @@ class FIFOQueueTest(test.TestCase): array_ops.placeholder(dtypes_lib.int32))) def testEnqueueWrongShapeAtRuntime(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.int32), ( (2, 2), (3, 3))) elems_ok = np.array([1] * 4).reshape((2, 2)).astype(np.int32) @@ -506,7 +506,7 @@ class FIFOQueueTest(test.TestCase): feed_dict={elems_bad: np.array([1] * 12).reshape((3, 4))}) def testEnqueueDequeueManyWrongShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.int32), ( (2, 2), (3, 3))) elems_ok = np.array([1] * 8).reshape((2, 2, 2)).astype(np.int32) @@ -521,7 +521,7 @@ class FIFOQueueTest(test.TestCase): dequeued_t.eval() def testParallelEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(1000, dtypes_lib.float32, shapes=()) elems = [10.0 * x for x in range(100)] enqueue_op = q.enqueue_many((elems,)) @@ -540,7 +540,7 @@ class FIFOQueueTest(test.TestCase): self.assertItemsEqual(dequeued_t.eval(), elems * 10) def testParallelDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(1000, dtypes_lib.float32, shapes=()) elems = [10.0 * x for x in range(1000)] enqueue_op = q.enqueue_many((elems,)) @@ -562,7 +562,7 @@ class FIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, dequeued_elems) def testParallelDequeueUpTo(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(1000, dtypes_lib.float32, shapes=()) elems = [10.0 * x for x in range(1000)] enqueue_op = q.enqueue_many((elems,)) @@ -586,7 +586,7 @@ class FIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, dequeued_elems) def testParallelEnqueueAndDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(50, dtypes_lib.float32, shapes=()) initial_elements = [10.0] * 49 q.enqueue_many((initial_elements,)).run() @@ -619,7 +619,7 @@ class FIFOQueueTest(test.TestCase): self.assertTrue(elem in (10.0, 20.0)) def testMixtureOfEnqueueAndEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.int32, shapes=()) enqueue_placeholder = array_ops.placeholder(dtypes_lib.int32, shape=()) enqueue_op = q.enqueue((enqueue_placeholder,)) @@ -655,7 +655,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testMixtureOfDequeueAndDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.int32, shapes=()) enqueue_op = q.enqueue_many((np.arange(250, dtype=np.int32),)) dequeued_t = q.dequeue() @@ -689,7 +689,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testBlockingDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -716,7 +716,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(elems, dequeued_elems) def testBlockingDequeueUpTo(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -743,7 +743,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(elems, dequeued_elems) def testDequeueManyWithTensorParameter(self): - with self.test_session(): + with self.cached_session(): # Define a first queue that contains integer counts. dequeue_counts = [random.randint(1, 10) for _ in range(100)] count_q = data_flow_ops.FIFOQueue(100, dtypes_lib.int32, ()) @@ -768,7 +768,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(elems, dequeued_elems) def testDequeueFromClosedQueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -786,7 +786,7 @@ class FIFOQueueTest(test.TestCase): dequeued_t.eval() def testBlockingDequeueFromClosedQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -812,7 +812,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueFromClosedEmptyQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) close_op = q.close() dequeued_t = q.dequeue() @@ -832,7 +832,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueManyFromClosedQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -857,7 +857,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueManyButNotAllFromClosedQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -882,7 +882,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testDequeueUpToFromClosedQueueReturnsRemainder(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -904,7 +904,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testEnqueueManyLargerThanCapacityWithConcurrentDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32, ()) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -941,7 +941,7 @@ class FIFOQueueTest(test.TestCase): close_thread.join() def testClosedBlockingDequeueManyRestoresPartialBatch(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(4, (dtypes_lib.float32, dtypes_lib.float32), ( (), ())) elems_a = [1.0, 2.0, 3.0] @@ -974,7 +974,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testBlockingDequeueManyFromClosedEmptyQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) close_op = q.close() dequeued_t = q.dequeue_many(4) @@ -994,7 +994,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueUpToFromClosedEmptyQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, ()) close_op = q.close() dequeued_t = q.dequeue_up_to(4) @@ -1014,7 +1014,7 @@ class FIFOQueueTest(test.TestCase): dequeue_thread.join() def testEnqueueToClosedQueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) enqueue_op = q.enqueue((10.0,)) close_op = q.close() @@ -1027,7 +1027,7 @@ class FIFOQueueTest(test.TestCase): enqueue_op.run() def testEnqueueManyToClosedQueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1041,7 +1041,7 @@ class FIFOQueueTest(test.TestCase): enqueue_op.run() def testBlockingEnqueueToFullQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1064,7 +1064,7 @@ class FIFOQueueTest(test.TestCase): thread.join() def testBlockingEnqueueManyToFullQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1091,7 +1091,7 @@ class FIFOQueueTest(test.TestCase): thread.join() def testBlockingEnqueueBeforeClose(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1128,7 +1128,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testBlockingEnqueueManyBeforeClose(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32) elems = [10.0, 20.0, 30.0] enqueue_op = q.enqueue_many((elems,)) @@ -1161,7 +1161,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(elem, dequeued_t.eval()) def testDoesNotLoseValue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.FIFOQueue(1, dtypes_lib.float32) enqueue_op = q.enqueue((10.0,)) size_t = q.size() @@ -1171,7 +1171,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(size_t.eval(), [1]) def testSharedQueueSameSession(self): - with self.test_session(): + with self.cached_session(): q1 = data_flow_ops.FIFOQueue( 1, dtypes_lib.float32, shared_name="shared_queue") q1.enqueue((10.0,)).run() @@ -1201,7 +1201,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(q2_size_t.eval(), [0]) def testIncompatibleSharedQueueErrors(self): - with self.test_session(): + with self.cached_session(): q_a_1 = data_flow_ops.FIFOQueue(10, dtypes_lib.float32, shared_name="q_a") q_a_2 = data_flow_ops.FIFOQueue(15, dtypes_lib.float32, shared_name="q_a") q_a_1.queue_ref.op.run() @@ -1244,7 +1244,7 @@ class FIFOQueueTest(test.TestCase): q_f_2.queue_ref.op.run() def testSelectQueue(self): - with self.test_session(): + with self.cached_session(): num_queues = 10 qlist = list() for _ in xrange(num_queues): @@ -1257,7 +1257,7 @@ class FIFOQueueTest(test.TestCase): self.assertEqual(q.dequeue().eval(), 10.0) def testSelectQueueOutOfRange(self): - with self.test_session(): + with self.cached_session(): q1 = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) q2 = data_flow_ops.FIFOQueue(15, dtypes_lib.float32) enq_q = data_flow_ops.FIFOQueue.from_list(3, [q1, q2]) @@ -1281,7 +1281,7 @@ class FIFOQueueTest(test.TestCase): sess.run(enqueue_many_op) def testResetOfBlockingOperation(self): - with self.test_session() as sess: + with self.cached_session() as sess: q_empty = data_flow_ops.FIFOQueue(5, dtypes_lib.float32, ()) dequeue_op = q_empty.dequeue() dequeue_many_op = q_empty.dequeue_many(1) @@ -1309,7 +1309,7 @@ class FIFOQueueTest(test.TestCase): t.join() def testBigEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(5, dtypes_lib.int32, ((),)) elem = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] enq = q.enqueue_many((elem,)) @@ -1354,7 +1354,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(elem, results) def testBigDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(2, dtypes_lib.int32, ((),)) elem = np.arange(4, dtype=np.int32) enq_list = [q.enqueue((e,)) for e in elem] @@ -1380,7 +1380,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(elem, results) def testDtypes(self): - with self.test_session() as sess: + with self.cached_session() as sess: dtypes = [ dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, dtypes_lib.int64, @@ -1411,7 +1411,7 @@ class FIFOQueueTest(test.TestCase): self.assertAllEqual(input_elem, output_elem) def testDequeueEnqueueFail(self): - with self.test_session() as session: + with self.cached_session() as session: q = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()]) a = q.dequeue() b = control_flow_ops.Assert(False, ["Before enqueue"]) @@ -1474,7 +1474,7 @@ class FIFOQueueDictTest(test.TestCase): self.assertEqual(["i", "f"], q.names) def testEnqueueDequeueOneComponent(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue( 10, dtypes_lib.float32, shapes=((),), names="f") # Verify that enqueue() checks that when using names we must enqueue a @@ -1519,7 +1519,7 @@ class FIFOQueueDictTest(test.TestCase): self.assertEqual([40.0, 50.0], list(f)) def testEnqueueDequeueMultipleComponent(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue( 10, (dtypes_lib.float32, dtypes_lib.int32, dtypes_lib.string), shapes=((), (), ()), @@ -1600,7 +1600,7 @@ class FIFOQueueWithTimeoutTest(test.TestCase): sess.run(dequeued_t) def testReusableAfterTimeout(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32) dequeued_t = q.dequeue() enqueue_op = q.enqueue(37) diff --git a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py index faac7d8365..f89d2062f1 100644 --- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py +++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py @@ -127,7 +127,7 @@ class FractionalAvgTest(test.TestCase): Returns: None """ - with self.test_session() as sess: + with self.cached_session() as sess: p, r, c = nn_ops.fractional_avg_pool( input_tensor, pooling_ratio, @@ -160,7 +160,7 @@ class FractionalAvgTest(test.TestCase): overlapping)) rand_mat = self._PRNG.randint(10, size=tensor_shape) pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1] - with self.test_session() as sess: + with self.cached_session() as sess: p, r, c = nn_ops.fractional_avg_pool( rand_mat.astype(np.float32), pooling_ratio, @@ -234,7 +234,7 @@ class FractionalAvgTest(test.TestCase): [4, 4, 5, 9, 7, 2] ]) # pyformat: enable - with self.test_session() as sess: + with self.cached_session() as sess: # Since deterministic = True, seed and seed2 are fixed. Therefore r, and c # are the same each time. We can have an expected result precomputed. # r = [0, 2, 4, 6] @@ -314,7 +314,7 @@ class FractionalAvgTest(test.TestCase): def testDifferentInputTensorShape(self): """Runs the operation in one session with different input tensor shapes.""" - with self.test_session() as sess: + with self.cached_session() as sess: input_holder = array_ops.placeholder(dtypes.float32, [None, None, None, 3]) pooling_ratio = [1, 1.5, 1.5, 1] @@ -389,7 +389,7 @@ class FractionalAvgPoolGradTest(test.TestCase): num_cols = col_window_size * 7 for num_channels in [1, 2]: input_shape = (num_batches, num_rows, num_cols, num_channels) - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant( self._GenerateRandomInputTensor(input_shape).astype( np.float32)) @@ -428,7 +428,7 @@ class FractionalAvgPoolGradTest(test.TestCase): num_cols = (col_window_size - 1) * 7 + 1 for num_channels in [1, 2]: input_shape = (num_batches, num_rows, num_cols, num_channels) - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant( self._GenerateRandomInputTensor(input_shape).astype( np.float32)) @@ -468,7 +468,7 @@ class FractionalAvgPoolGradTest(test.TestCase): for pseudo_random in True, False: for overlapping in True, False: - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool( input_tensor, @@ -501,7 +501,7 @@ class FractionalAvgPoolGradTest(test.TestCase): for num_channels in [1, 3]: input_shape = (num_batches, num_rows, num_cols, num_channels) input_data = self._GenerateRandomInputTensor(input_shape) - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool( input_tensor, @@ -532,7 +532,7 @@ class FractionalAvgPoolGradTest(test.TestCase): overlapping = True pseudo_random = False - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_avg_pool( input_tensor, diff --git a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py index 6477c9ebc4..9b94ca8554 100644 --- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py +++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py @@ -127,7 +127,7 @@ class FractionalMaxPoolTest(test.TestCase): Returns: None """ - with self.test_session() as sess: + with self.cached_session() as sess: p, r, c = nn_ops.fractional_max_pool( input_tensor, pooling_ratio, @@ -160,7 +160,7 @@ class FractionalMaxPoolTest(test.TestCase): overlapping)) rand_mat = self._PRNG.randint(10, size=tensor_shape) pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1] - with self.test_session() as sess: + with self.cached_session() as sess: p, r, c = nn_ops.fractional_max_pool( rand_mat, pooling_ratio, @@ -285,7 +285,7 @@ class FractionalMaxPoolTest(test.TestCase): def testDifferentInputTensorShape(self): """Runs the operation in one session with different input tensor shapes.""" - with self.test_session() as sess: + with self.cached_session() as sess: input_holder = array_ops.placeholder(dtypes.float32, [None, None, None, 3]) pooling_ratio = [1, 1.5, 1.5, 1] @@ -374,7 +374,7 @@ class FractionalMaxPoolGradTest(test.TestCase): num_cols = col_window_size * 7 for num_channels in [1, 2]: input_shape = (num_batches, num_rows, num_cols, num_channels) - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant( self._GenerateUniqueRandomInputTensor(input_shape)) window_size = [1, row_window_size, col_window_size, 1] @@ -409,7 +409,7 @@ class FractionalMaxPoolGradTest(test.TestCase): num_cols = (col_window_size - 1) * 7 + 1 for num_channels in [1, 2]: input_shape = (num_batches, num_rows, num_cols, num_channels) - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant( self._GenerateUniqueRandomInputTensor(input_shape)) window_size = [1, row_window_size, col_window_size, 1] @@ -447,7 +447,7 @@ class FractionalMaxPoolGradTest(test.TestCase): for pseudo_random in True, False: for overlapping in True, False: - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool( input_tensor, @@ -482,7 +482,7 @@ class FractionalMaxPoolGradTest(test.TestCase): input_data = self._GenerateUniqueRandomInputTensor(input_shape) # Add some randomness to make input_data not so 'integer' input_data += self._PRNG.random_sample(input_shape) - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool( input_tensor, @@ -515,7 +515,7 @@ class FractionalMaxPoolGradTest(test.TestCase): overlapping = True pseudo_random = False - with self.test_session() as _: + with self.cached_session() as _: input_tensor = constant_op.constant(input_data, shape=input_shape) output_tensor, unused_a, unused_b = nn_ops.fractional_max_pool( input_tensor, @@ -579,7 +579,7 @@ class FractionalMaxPoolGradTest(test.TestCase): 0.0, 0.0, 0.0, 0.0, 6.0, 0.0, 21.0, 0.0], input_size) # pyformat: disable - with self.test_session() as _: + with self.cached_session() as _: # Test when overlapping is False input_tensor = constant_op.constant(input_data, shape=input_size) output_tensor = constant_op.constant( diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/gather_op_test.py index 033fa95935..85bf969068 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/gather_op_test.py @@ -147,7 +147,7 @@ class GatherTest(test.TestCase): def testString(self): params = np.array([[b"asdf", b"zxcv"], [b"qwer", b"uiop"]]) - with self.test_session(): + with self.cached_session(): self.assertAllEqual([b"qwer", b"uiop"], array_ops.gather(params, 1, axis=0).eval()) self.assertAllEqual([b"asdf", b"qwer"], @@ -157,7 +157,7 @@ class GatherTest(test.TestCase): for unsigned_type in (dtypes.uint32, dtypes.uint64): params = self._buildParams( np.array([[1, 2, 3], [7, 8, 9]]), unsigned_type) - with self.test_session(): + with self.cached_session(): self.assertAllEqual([7, 8, 9], array_ops.gather(params, 1, axis=0).eval()) self.assertAllEqual([1, 7], array_ops.gather(params, 0, axis=1).eval()) diff --git a/tensorflow/python/kernel_tests/gradient_correctness_test.py b/tensorflow/python/kernel_tests/gradient_correctness_test.py index e93c6235f7..291a69ebac 100644 --- a/tensorflow/python/kernel_tests/gradient_correctness_test.py +++ b/tensorflow/python/kernel_tests/gradient_correctness_test.py @@ -30,7 +30,7 @@ from tensorflow.python.platform import test class GradientCorrectnessTest(test.TestCase): def testMultipleOutputChainedGradients(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = constant_op.constant(1.0, dtype=dtypes.float32) yexp = math_ops.exp(x) yexplog = math_ops.log(yexp) @@ -43,13 +43,13 @@ class GradientCorrectnessTest(test.TestCase): def testIdentityGradient(self): x = constant_op.constant(3.) dx_dx, = gradients_impl.gradients(x, x) - with self.test_session() as sess: + with self.cached_session() as sess: self.assertAllClose(1., sess.run(dx_dx)) def testIntegerIdentityGradient(self): x = constant_op.constant(3) dx_dx, = gradients_impl.gradients(x, x) - with self.test_session() as sess: + with self.cached_session() as sess: self.assertAllClose(1, sess.run(dx_dx)) def testGradientWithIntegerPath(self): @@ -57,7 +57,7 @@ class GradientCorrectnessTest(test.TestCase): k = math_ops.to_float(math_ops.to_int32(x)) y = x * k dy_dx, = gradients_impl.gradients(y, x) - with self.test_session() as sess: + with self.cached_session() as sess: self.assertAllClose([3., 4.], sess.run(dy_dx)) def testNoIntegerGradient1(self): diff --git a/tensorflow/python/kernel_tests/identity_n_op_py_test.py b/tensorflow/python/kernel_tests/identity_n_op_py_test.py index 408b173981..518733cd8e 100644 --- a/tensorflow/python/kernel_tests/identity_n_op_py_test.py +++ b/tensorflow/python/kernel_tests/identity_n_op_py_test.py @@ -28,7 +28,7 @@ from tensorflow.python.platform import test class IdentityNOpTest(test.TestCase): def testInt32String_6(self): - with self.test_session() as sess: + with self.cached_session() as sess: [value0, value1] = sess.run( array_ops.identity_n([[1, 2, 3, 4, 5, 6], [b"a", b"b", b"C", b"d", b"E", b"f", b"g"]])) @@ -37,7 +37,7 @@ class IdentityNOpTest(test.TestCase): np.array([b"a", b"b", b"C", b"d", b"E", b"f", b"g"]), value1) def testInt32_shapes(self): - with self.test_session() as sess: + with self.cached_session() as sess: inp0 = constant_op.constant([10, 20, 30, 40, 50, 60], shape=[2, 3]) inp1 = constant_op.constant([11, 21, 31, 41, 51, 61], shape=[3, 2]) inp2 = constant_op.constant( @@ -52,12 +52,12 @@ class IdentityNOpTest(test.TestCase): def testString(self): source = [b"A", b"b", b"C", b"d", b"E", b"f"] - with self.test_session() as sess: + with self.cached_session() as sess: [value] = sess.run(array_ops.identity_n([source])) self.assertAllEqual(source, value) def testIdentityShape(self): - with self.test_session(): + with self.cached_session(): shape = [2, 3] array_2x3 = [[1, 2, 3], [6, 5, 4]] tensor = constant_op.constant(array_2x3) diff --git a/tensorflow/python/kernel_tests/identity_op_py_test.py b/tensorflow/python/kernel_tests/identity_op_py_test.py index 49fb76d5b4..37f9f716f8 100644 --- a/tensorflow/python/kernel_tests/identity_op_py_test.py +++ b/tensorflow/python/kernel_tests/identity_op_py_test.py @@ -31,24 +31,24 @@ from tensorflow.python.platform import test class IdentityOpTest(test.TestCase): def testInt32_6(self): - with self.test_session(): + with self.cached_session(): value = array_ops.identity([1, 2, 3, 4, 5, 6]).eval() self.assertAllEqual(np.array([1, 2, 3, 4, 5, 6]), value) def testInt32_2_3(self): - with self.test_session(): + with self.cached_session(): inp = constant_op.constant([10, 20, 30, 40, 50, 60], shape=[2, 3]) value = array_ops.identity(inp).eval() self.assertAllEqual(np.array([[10, 20, 30], [40, 50, 60]]), value) def testString(self): source = [b"A", b"b", b"C", b"d", b"E", b"f"] - with self.test_session(): + with self.cached_session(): value = array_ops.identity(source).eval() self.assertAllEqual(source, value) def testIdentityShape(self): - with self.test_session(): + with self.cached_session(): shape = [2, 3] array_2x3 = [[1, 2, 3], [6, 5, 4]] tensor = constant_op.constant(array_2x3) @@ -59,7 +59,7 @@ class IdentityOpTest(test.TestCase): array_ops.identity(np.array(array_2x3)).get_shape()) def testRefIdentityShape(self): - with self.test_session(): + with self.cached_session(): shape = [2, 3] tensor = variables.Variable( constant_op.constant( diff --git a/tensorflow/python/kernel_tests/in_topk_op_test.py b/tensorflow/python/kernel_tests/in_topk_op_test.py index fafeea8ec0..6fdb497bc6 100644 --- a/tensorflow/python/kernel_tests/in_topk_op_test.py +++ b/tensorflow/python/kernel_tests/in_topk_op_test.py @@ -30,7 +30,7 @@ class InTopKTest(test.TestCase): def _validateInTopK(self, predictions, target, k, expected): np_ans = np.array(expected) - with self.test_session(): + with self.cached_session(): precision = nn_ops.in_top_k(predictions, target, k) out = precision.eval() self.assertAllClose(np_ans, out) @@ -65,7 +65,7 @@ class InTopKTest(test.TestCase): def testBadTarget(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] target = [0, 80000] - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "target.*out of range"): nn_ops.in_top_k(predictions, target, 2).eval() @@ -75,7 +75,7 @@ class InTopKTest(test.TestCase): target = [0, 2] k = constant_op.constant(3) np_ans = np.array([False, True]) - with self.test_session(): + with self.cached_session(): precision = nn_ops.in_top_k(predictions, target, k) out = precision.eval() self.assertAllClose(np_ans, out) diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index f6097ad489..79ce965242 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -343,7 +343,7 @@ class UniformUnitScalingInitializationTest(test.TestCase): def testZeroSize(self): shape = [0, 2] - with self.test_session(): + with self.cached_session(): x = variable_scope.get_variable( "x", shape=shape, diff --git a/tensorflow/python/kernel_tests/inplace_ops_test.py b/tensorflow/python/kernel_tests/inplace_ops_test.py index 6e894365af..90759c23ae 100644 --- a/tensorflow/python/kernel_tests/inplace_ops_test.py +++ b/tensorflow/python/kernel_tests/inplace_ops_test.py @@ -153,7 +153,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase): self.assertAllClose(vy, vz) def testError(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors.InvalidArgumentError, "must be a vector"): _ = inplace_ops.inplace_update([[1.]], [[0]], [[10]]).eval() diff --git a/tensorflow/python/kernel_tests/io_ops_test.py b/tensorflow/python/kernel_tests/io_ops_test.py index 61944f7e31..afa24195cb 100644 --- a/tensorflow/python/kernel_tests/io_ops_test.py +++ b/tensorflow/python/kernel_tests/io_ops_test.py @@ -37,7 +37,7 @@ class IoOpsTest(test.TestCase): with tempfile.NamedTemporaryFile( prefix='ReadFileTest', dir=self.get_temp_dir(), delete=False) as temp: temp.write(contents) - with self.test_session(): + with self.cached_session(): read = io_ops.read_file(temp.name) self.assertEqual([], read.get_shape()) self.assertEqual(read.eval(), contents) @@ -51,7 +51,7 @@ class IoOpsTest(test.TestCase): prefix='WriteFileTest', dir=self.get_temp_dir(), delete=False) as temp: pass - with self.test_session() as sess: + with self.cached_session() as sess: w = io_ops.write_file(temp.name, contents) sess.run(w) with open(temp.name, 'rb') as f: @@ -65,7 +65,7 @@ class IoOpsTest(test.TestCase): contents = compat.as_bytes(contents) subdir = os.path.join(self.get_temp_dir(), 'subdir1') filepath = os.path.join(subdir, 'subdir2', 'filename') - with self.test_session() as sess: + with self.cached_session() as sess: w = io_ops.write_file(filepath, contents) sess.run(w) with open(filepath, 'rb') as f: @@ -88,7 +88,7 @@ class IoOpsTest(test.TestCase): prefix=c, dir=self.get_temp_dir(), delete=True) for c in cases ] - with self.test_session(): + with self.cached_session(): # Test exact match without wildcards. for f in files: self.assertEqual( diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py index 0e4e58409e..cd6a34d657 100644 --- a/tensorflow/python/kernel_tests/linalg_grad_test.py +++ b/tensorflow/python/kernel_tests/linalg_grad_test.py @@ -40,7 +40,7 @@ def _AddTest(test, op_name, testcase_name, fn): class ShapeTest(test_lib.TestCase): def testBatchGradientUnknownSize(self): - with self.test_session(): + with self.cached_session(): batch_size = constant_op.constant(3) matrix_size = constant_op.constant(4) batch_identity = array_ops.tile( diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py index 2f28d37eff..aa17f727d0 100644 --- a/tensorflow/python/kernel_tests/linalg_ops_test.py +++ b/tensorflow/python/kernel_tests/linalg_ops_test.py @@ -128,7 +128,7 @@ class AdjointTest(test.TestCase): matrix_np = np.array([[1 + 1j, 2 + 2j, 3 + 3j], [4 + 4j, 5 + 5j, 6 + 6j]]).astype(dtype) expected_transposed = np.conj(matrix_np.T) - with self.test_session(): + with self.cached_session(): matrix = ops.convert_to_tensor(matrix_np) transposed = linalg.adjoint(matrix) self.assertEqual((3, 2), transposed.get_shape()) diff --git a/tensorflow/python/kernel_tests/listdiff_op_test.py b/tensorflow/python/kernel_tests/listdiff_op_test.py index ee86cf0b24..baeb40dd63 100644 --- a/tensorflow/python/kernel_tests/listdiff_op_test.py +++ b/tensorflow/python/kernel_tests/listdiff_op_test.py @@ -42,7 +42,7 @@ class ListDiffTest(test.TestCase): out = [compat.as_bytes(str(a)) for a in out] for diff_func in [array_ops.setdiff1d]: for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session() as sess: + with self.cached_session() as sess: x_tensor = ops.convert_to_tensor(x, dtype=dtype) y_tensor = ops.convert_to_tensor(y, dtype=dtype) out_tensor, idx_tensor = diff_func(x_tensor, y_tensor, diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py index e635a71c78..82729b9e27 100644 --- a/tensorflow/python/kernel_tests/logging_ops_test.py +++ b/tensorflow/python/kernel_tests/logging_ops_test.py @@ -31,7 +31,7 @@ from tensorflow.python.platform import test class LoggingOpsTest(test.TestCase): def testAssertDivideByZero(self): - with self.test_session() as sess: + with self.cached_session() as sess: epsilon = ops.convert_to_tensor(1e-20) x = ops.convert_to_tensor(0.0) y = ops.convert_to_tensor(1.0) @@ -66,7 +66,7 @@ class PrintGradientTest(test.TestCase): self.assertEqual(inp.get_shape(), inp_printed.get_shape()) def testPrintGradient(self): - with self.test_session(): + with self.cached_session(): inp = constant_op.constant(2.0, shape=[100, 32], name="in") w = constant_op.constant(4.0, shape=[10, 100], name="w") wx = math_ops.matmul(w, inp, name="wx") diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 5f08339fe5..38b14e34cc 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -36,7 +36,7 @@ from tensorflow.python.training import server_lib class HashTableOpTest(test.TestCase): def testHashTable(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -54,7 +54,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual([0, 1, -1], result) def testHashTableFindHighRank(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -72,7 +72,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual([[0, 1], [-1, -1]], result) def testHashTableInitWithPythonArrays(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = ["brain", "salad", "surgery"] values = [0, 1, 2] @@ -90,7 +90,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual([0, 1, -1], result) def testHashTableInitWithNumPyArrays(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = np.array(["brain", "salad", "surgery"], dtype=np.str) values = np.array([0, 1, 2], dtype=np.int64) @@ -107,7 +107,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual([0, 1, -1], result) def testMultipleHashTables(self): - with self.test_session() as sess: + with self.cached_session() as sess: default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -135,7 +135,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual([0, 1, -1], out3) def testHashTableWithTensorDefault(self): - with self.test_session(): + with self.cached_session(): default_val = constant_op.constant(-1, dtypes.int64) keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -150,7 +150,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual([0, 1, -1], result) def testHashTableWithSparseTensorInput(self): - with self.test_session() as sess: + with self.cached_session() as sess: default_val = constant_op.constant(-1, dtypes.int64) keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -173,7 +173,7 @@ class HashTableOpTest(test.TestCase): self.assertAllEqual(sp_shape, out_shape) def testSignatureMismatch(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -190,7 +190,7 @@ class HashTableOpTest(test.TestCase): lookup_ops.KeyValueTensorInitializer(keys, values), "UNK") def testDTypes(self): - with self.test_session(): + with self.cached_session(): default_val = -1 with self.assertRaises(TypeError): lookup_ops.HashTable( @@ -198,7 +198,7 @@ class HashTableOpTest(test.TestCase): dtypes.int64), default_val) def testNotInitialized(self): - with self.test_session(): + with self.cached_session(): default_val = -1 table = lookup_ops.HashTable( lookup_ops.KeyValueTensorInitializer( @@ -211,7 +211,7 @@ class HashTableOpTest(test.TestCase): output.eval() def testInitializeTwice(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2], dtypes.int64) @@ -223,7 +223,7 @@ class HashTableOpTest(test.TestCase): table.init.run() def testInitializationWithInvalidDimensions(self): - with self.test_session(): + with self.cached_session(): default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) values = constant_op.constant([0, 1, 2, 3, 4], dtypes.int64) @@ -272,7 +272,7 @@ class IndexTableFromFile(test.TestCase): def test_string_index_table_from_file(self): vocabulary_file = self._createVocabFile("f2i_vocab1.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) @@ -284,7 +284,7 @@ class IndexTableFromFile(test.TestCase): def test_string_index_table_from_multicolumn_file(self): vocabulary_file = self._createVocabFile( "f2i_vocab1.txt", values=("brain\t300", "salad\t20", "surgery\t1")) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1, @@ -299,7 +299,7 @@ class IndexTableFromFile(test.TestCase): def test_string_index_table_from_multicolumn_file_custom_delimiter(self): vocabulary_file = self._createVocabFile( "f2i_vocab1.txt", values=("brain 300", "salad 20", "surgery 1")) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1, @@ -314,7 +314,7 @@ class IndexTableFromFile(test.TestCase): def test_string_index_table_from_file_tensor_filename(self): vocabulary_file = self._createVocabFile("f2i_vocab1.txt") - with self.test_session(): + with self.cached_session(): vocabulary_file = constant_op.constant(vocabulary_file) table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1) @@ -328,7 +328,7 @@ class IndexTableFromFile(test.TestCase): def test_string_index_table_from_file_placeholder_filename(self): vocabulary_file = self._createVocabFile("f2i_vocab1.txt") - with self.test_session(): + with self.cached_session(): vocabulary_placeholder = array_ops.placeholder(dtypes.string, []) table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_placeholder, num_oov_buckets=1) @@ -344,7 +344,7 @@ class IndexTableFromFile(test.TestCase): def test_int32_index_table_from_file(self): vocabulary_file = self._createVocabFile( "f2i_vocab2.txt", values=("42", "1", "-1000")) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1, @@ -359,7 +359,7 @@ class IndexTableFromFile(test.TestCase): def test_int64_index_table_from_file(self): vocabulary_file = self._createVocabFile( "f2i_vocab3.txt", values=("42", "1", "-1000")) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1, @@ -374,7 +374,7 @@ class IndexTableFromFile(test.TestCase): def test_index_table_from_file_with_default_value(self): default_value = -42 vocabulary_file = self._createVocabFile("f2i_vocab4.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, default_value=default_value) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) @@ -385,7 +385,7 @@ class IndexTableFromFile(test.TestCase): def test_index_table_from_file_with_oov_buckets(self): vocabulary_file = self._createVocabFile("f2i_vocab5.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1000) ids = table.lookup( @@ -432,7 +432,7 @@ class IndexTableFromFile(test.TestCase): def test_index_table_from_file_with_vocab_size_too_small(self): vocabulary_file = self._createVocabFile("f2i_vocab6.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, vocab_size=2) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) @@ -444,7 +444,7 @@ class IndexTableFromFile(test.TestCase): def test_index_table_from_file_with_vocab_size_too_large(self): vocabulary_file = self._createVocabFile("f2i_vocab7.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, vocab_size=4) self.assertRaisesRegexp(errors_impl.InvalidArgumentError, @@ -459,7 +459,7 @@ class IndexTableFromFile(test.TestCase): vocabulary_file=vocabulary_file, vocab_size=0) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, vocab_size=3) ids = table.lookup(constant_op.constant(["salad", "surgery", "tarkus"])) @@ -471,7 +471,7 @@ class IndexTableFromFile(test.TestCase): def test_index_table_from_file_with_invalid_hashers(self): vocabulary_file = self._createVocabFile("invalid_hasher.txt") - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, @@ -490,14 +490,14 @@ class IndexTableFromFile(test.TestCase): def test_index_table_from_file_table_ref_with_oov_buckets(self): vocabulary_file = self._createVocabFile("f2i_vocab9.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=1) self.assertIsNotNone(table.table_ref) def test_index_table_from_file_table_ref_without_oov_buckets(self): vocabulary_file = self._createVocabFile("f2i_vocab10.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=0) self.assertIsNotNone(table.table_ref) @@ -506,21 +506,21 @@ class IndexTableFromFile(test.TestCase): class KeyValueTensorInitializerTest(test.TestCase): def test_string(self): - with ops.Graph().as_default(), self.test_session(): + with ops.Graph().as_default(), self.cached_session(): init = lookup_ops.KeyValueTensorInitializer( ("brain", "salad", "surgery"), (0, 1, 2), dtypes.string, dtypes.int64) table = lookup_ops.HashTable(init, default_value=-1) table.init.run() def test_int64(self): - with ops.Graph().as_default(), self.test_session(): + with ops.Graph().as_default(), self.cached_session(): init = lookup_ops.KeyValueTensorInitializer((42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64) table = lookup_ops.HashTable(init, default_value=-1) table.init.run() def test_int32(self): - with ops.Graph().as_default(), self.test_session(): + with ops.Graph().as_default(), self.cached_session(): init = lookup_ops.KeyValueTensorInitializer((42, 1, -1000), (0, 1, 2), dtypes.int32, dtypes.int64) table = lookup_ops.HashTable(init, default_value=-1) @@ -532,7 +532,7 @@ class KeyValueTensorInitializerTest(test.TestCase): class IndexTableFromTensor(test.TestCase): def test_index_table_from_tensor_with_tensor_init(self): - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=("brain", "salad", "surgery"), num_oov_buckets=1) ids = table.lookup(constant_op.constant(("salad", "surgery", "tarkus"))) @@ -542,7 +542,7 @@ class IndexTableFromTensor(test.TestCase): self.assertAllEqual((1, 2, 3), ids.eval()) def test_int32_index_table_from_tensor_with_tensor_init(self): - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=(42, 1, -1000), num_oov_buckets=1, dtype=dtypes.int32) ids = table.lookup( @@ -553,7 +553,7 @@ class IndexTableFromTensor(test.TestCase): self.assertAllEqual((1, 2, 3), ids.eval()) def test_int64_index_table_from_tensor_with_tensor_init(self): - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=(42, 1, -1000), num_oov_buckets=1, dtype=dtypes.int64) ids = table.lookup( @@ -565,7 +565,7 @@ class IndexTableFromTensor(test.TestCase): def test_index_table_from_tensor_with_default_value(self): default_value = -42 - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=["brain", "salad", "surgery"], default_value=default_value) @@ -576,14 +576,14 @@ class IndexTableFromTensor(test.TestCase): self.assertAllEqual((1, 2, default_value), ids.eval()) def test_index_table_from_tensor_missing_vocabulary_list(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "vocabulary_list must be specified"): lookup_ops.index_table_from_tensor( vocabulary_list=None, num_oov_buckets=1) def test_index_table_from_tensor_empty_vocabulary_list(self): - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_table_from_tensor( vocabulary_list=np.array([], dtype=np.str_), num_oov_buckets=1) ids = table.lookup(constant_op.constant(["salad", "surgery", "brain"])) @@ -593,7 +593,7 @@ class IndexTableFromTensor(test.TestCase): lookup_ops.tables_initializer().run() def test_index_table_from_tensor_with_invalid_hashers(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): lookup_ops.index_table_from_tensor( vocabulary_list=["brain", "salad", "surgery"], @@ -623,7 +623,7 @@ class IndexToStringTableFromFileTest(test.TestCase): type_funcs = [str, constant_op.constant] for type_func in type_funcs: vocabulary_file = type_func(vocabulary_path) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file) features = table.lookup( @@ -636,7 +636,7 @@ class IndexToStringTableFromFileTest(test.TestCase): def test_index_to_string_table_from_multicolumn_file(self): vocabulary_file = self._createVocabFile( "f2i_vocab1.txt", values=("brain\t300", "salad\t20", "surgery\t1")) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, key_column_index=lookup_ops.TextFileIndex.LINE_NUMBER, @@ -650,7 +650,7 @@ class IndexToStringTableFromFileTest(test.TestCase): def test_index_to_string_table_from_multicolumn_file_custom_delimiter(self): vocabulary_file = self._createVocabFile( "f2i_vocab1.txt", values=("brain 300", "salad 20", "surgery 1")) - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, key_column_index=lookup_ops.TextFileIndex.LINE_NUMBER, @@ -665,7 +665,7 @@ class IndexToStringTableFromFileTest(test.TestCase): def test_index_to_string_table_with_default_value(self): default_value = b"NONE" vocabulary_file = self._createVocabFile("f2i_vocab2.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, default_value=default_value) features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64)) @@ -677,7 +677,7 @@ class IndexToStringTableFromFileTest(test.TestCase): def test_index_to_string_table_with_vocab_size_too_small(self): default_value = b"NONE" vocabulary_file = self._createVocabFile("f2i_vocab2.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, vocab_size=2, @@ -690,7 +690,7 @@ class IndexToStringTableFromFileTest(test.TestCase): def test_index_to_string_table_with_vocab_size_too_large(self): vocabulary_file = self._createVocabFile("f2i_vocab6.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, vocab_size=4) features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64)) @@ -702,7 +702,7 @@ class IndexToStringTableFromFileTest(test.TestCase): def test_index_to_string_table_with_vocab_size(self): vocabulary_file = self._createVocabFile("f2i_vocab7.txt") - with self.test_session(): + with self.cached_session(): table = lookup_ops.index_to_string_table_from_file( vocabulary_file=vocabulary_file, vocab_size=3) features = table.lookup(constant_op.constant([1, 2, 4], dtypes.int64)) @@ -715,7 +715,7 @@ class IndexToStringTableFromFileTest(test.TestCase): class IndexToStringTableFromTensorTest(test.TestCase): def test_index_to_string_table_from_tensor(self): - with self.test_session(): + with self.cached_session(): vocabulary_list = constant_op.constant(["brain", "salad", "surgery"]) table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=vocabulary_list) @@ -729,7 +729,7 @@ class IndexToStringTableFromTensorTest(test.TestCase): features.eval()) def test_duplicate_entries(self): - with self.test_session(): + with self.cached_session(): vocabulary_list = constant_op.constant(["hello", "hello"]) table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=vocabulary_list) @@ -740,7 +740,7 @@ class IndexToStringTableFromTensorTest(test.TestCase): def test_index_to_string_with_default_value(self): default_value = b"NONE" - with self.test_session(): + with self.cached_session(): vocabulary_list = constant_op.constant(["brain", "salad", "surgery"]) table = lookup_ops.index_to_string_table_from_tensor( vocabulary_list=vocabulary_list, default_value=default_value) @@ -764,7 +764,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInitializeStringTable(self): vocabulary_file = self._createVocabFile("one_column_1.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 table = lookup_ops.HashTable( lookup_ops.TextFileInitializer( @@ -782,7 +782,7 @@ class InitializeTableFromFileOpTest(test.TestCase): vocabulary_file = self._createVocabFile( "one_column_int64.txt", values=("42", "1", "-1000")) - with self.test_session(): + with self.cached_session(): default_value = -1 table = lookup_ops.HashTable( lookup_ops.TextFileInitializer( @@ -800,7 +800,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInitializeIndexTable(self): vocabulary_file = self._createVocabFile("one_column_2.txt") - with self.test_session(): + with self.cached_session(): default_value = "UNK" key_index = lookup_ops.TextFileIndex.LINE_NUMBER value_index = lookup_ops.TextFileIndex.WHOLE_LINE @@ -821,7 +821,7 @@ class InitializeTableFromFileOpTest(test.TestCase): with open(vocabulary_file, "w") as f: f.write("\n".join(["0\tbrain\t1", "1\tsalad\t5", "2\tsurgery\t6"]) + "\n") - with self.test_session(): + with self.cached_session(): default_value = -1 key_index = 1 value_index = 2 @@ -843,7 +843,7 @@ class InitializeTableFromFileOpTest(test.TestCase): with open(vocabulary_file, "w") as f: f.write("\n".join(["0\tbrain\t1", "1\tsalad\t5", "2\tsurgery\t6"]) + "\n") - with self.test_session(): + with self.cached_session(): default_value = -1 key_index = 2 value_index = 1 @@ -857,7 +857,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInvalidDataType(self): vocabulary_file = self._createVocabFile("one_column_3.txt") - with self.test_session(): + with self.cached_session(): default_value = "UNK" key_index = lookup_ops.TextFileIndex.WHOLE_LINE value_index = lookup_ops.TextFileIndex.LINE_NUMBER @@ -870,7 +870,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInvalidIndex(self): vocabulary_file = self._createVocabFile("one_column_4.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 key_index = 1 # second column of the line value_index = lookup_ops.TextFileIndex.LINE_NUMBER @@ -885,7 +885,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInitializeSameTableWithMultipleNodes(self): vocabulary_file = self._createVocabFile("one_column_5.txt") - with self.test_session() as sess: + with self.cached_session() as sess: shared_name = "shared-one-columm" default_value = -1 table1 = lookup_ops.HashTable( @@ -924,7 +924,7 @@ class InitializeTableFromFileOpTest(test.TestCase): self.assertAllEqual([0, 1, -1], out3) def testInitializeTableWithNoFilename(self): - with self.test_session(): + with self.cached_session(): default_value = -1 with self.assertRaises(ValueError): lookup_ops.HashTable( @@ -934,7 +934,7 @@ class InitializeTableFromFileOpTest(test.TestCase): default_value) def testInitializeWithVocabSize(self): - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 vocabulary_file1 = self._createVocabFile("one_column6.txt") @@ -982,7 +982,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testFeedVocabularyName(self): vocabulary_file = self._createVocabFile("feed_vocabulary.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 table = lookup_ops.HashTable( lookup_ops.TextFileInitializer( @@ -1008,7 +1008,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInvalidFilenames(self): vocabulary_file = self._createVocabFile("filename_shape.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 # Invalid data type @@ -1031,7 +1031,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testIdToStringTable(self): vocab_file = self._createVocabFile("feat_to_id_1.txt") - with self.test_session(): + with self.cached_session(): default_value = "UNK" vocab_size = 3 table = lookup_ops.HashTable( @@ -1048,7 +1048,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testStringToIdTable(self): vocab_file = self._createVocabFile("feat_to_id_2.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 table = lookup_ops.HashTable( @@ -1065,7 +1065,7 @@ class InitializeTableFromFileOpTest(test.TestCase): def testInt64ToIdTable(self): vocab_file = self._createVocabFile( "feat_to_id_3.txt", values=("42", "1", "-1000")) - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 table = lookup_ops.HashTable( @@ -1090,7 +1090,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testStringIdTableWithHashBuckets(self): vocab_file = self._createVocabFile("feat_to_id_1.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 oov_buckets = 1 @@ -1110,7 +1110,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testInt32IdTableWithHashBuckets(self): vocab_file = self._createVocabFile("feat_to_id_2.txt", ("42", "1", "-1000")) - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 oov_buckets = 1 @@ -1132,7 +1132,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testInt64IdTableWithHashBuckets(self): vocab_file = self._createVocabFile("feat_to_id_3.txt", ("42", "1", "-1000")) - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 oov_buckets = 1 @@ -1151,7 +1151,7 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertEquals(vocab_size + oov_buckets, table.size().eval()) def testStringIdTableWithOnlyHashBucket(self): - with self.test_session(): + with self.cached_session(): oov_buckets = 5 # Set a table that only uses hash buckets, for each input value returns @@ -1172,7 +1172,7 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertEquals(oov_buckets, table.size().eval()) def testInt32IdTableWithOnlyHashBucket(self): - with self.test_session(): + with self.cached_session(): oov_buckets = 5 # Set a table that only uses hash buckets, for each input value returns @@ -1194,20 +1194,20 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertEquals(oov_buckets, table.size().eval()) def testFloat64IdTableWithOnlyHashBucket(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(TypeError, "Invalid key_dtype"): lookup_ops.IdTableWithHashBuckets( None, num_oov_buckets=5, key_dtype=dtypes.float64) def testBoolIdTableWithOnlyHashBucket(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(TypeError, "Invalid key_dtype"): lookup_ops.IdTableWithHashBuckets( None, num_oov_buckets=5, key_dtype=dtypes.bool) def testIdTableWithHashBucketsWithMultipleInitializers(self): vocab_file = self._createVocabFile("feat_to_id_4.txt") - with self.test_session() as sess: + with self.cached_session() as sess: default_value = -1 vocab_size = 3 oov_buckets = 3 @@ -1248,7 +1248,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testIdTableWithHashBucketsInitializationAcrossSessions(self): vocab_file = self._createVocabFile("feat_to_id_5.txt") shared_name = "across-sessions" - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 oov_buckets = 1 @@ -1269,7 +1269,7 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertAllEqual([0, 1, 2, 3], out1.eval()) self.assertEquals(vocab_size + oov_buckets, table1.size().eval()) - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 oov_buckets = 1 @@ -1292,7 +1292,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testIdTableWithHashBucketsWithMultipleInitializersDifferentDefault(self): vocab_file = self._createVocabFile("feat_to_id_6.txt") - with self.test_session() as sess: + with self.cached_session() as sess: default_value1 = -1 vocab_size = 3 oov_buckets = 0 @@ -1328,7 +1328,7 @@ class IdTableWithHashBucketsTest(test.TestCase): vocab_file = self._createVocabFile("feat_to_id_7.txt") input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] - with self.test_session() as sess: + with self.cached_session() as sess: sp_features = sparse_tensor.SparseTensor( constant_op.constant(input_indices, dtypes.int64), constant_op.constant(["brain", "salad", "brain", "surgery", "tarkus"], @@ -1355,7 +1355,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testInt32SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] - with self.test_session() as sess: + with self.cached_session() as sess: sp_features = sparse_tensor.SparseTensor( constant_op.constant(input_indices, dtypes.int64), constant_op.constant([42, 1, 42, -1000, 11], dtypes.int32), @@ -1383,7 +1383,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testInt64SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] - with self.test_session() as sess: + with self.cached_session() as sess: sp_features = sparse_tensor.SparseTensor( constant_op.constant(input_indices, dtypes.int64), constant_op.constant([42, 1, 42, -1000, 11], dtypes.int64), @@ -1410,7 +1410,7 @@ class IdTableWithHashBucketsTest(test.TestCase): def testIdTableWithHashBucketsWithInvalidHashers(self): vocab_file = self._createVocabFile("feat_to_id_4.txt") - with self.test_session(): + with self.cached_session(): default_value = -1 vocab_size = 3 oov_buckets = 1 @@ -1451,7 +1451,7 @@ class IdTableWithHashBucketsTest(test.TestCase): hasher_spec=lookup_ops.StrongHashSpec([None, 2])) def testIdTableWithHashBucketsNoInnerTable(self): - with self.test_session(): + with self.cached_session(): table = lookup_ops.IdTableWithHashBuckets(None, num_oov_buckets=1) self.assertIsNone(table.table_ref) diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index 87fc715783..3ce0b74263 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -61,62 +61,62 @@ class AbsoluteDifferenceLossTest(test.TestCase): self._labels = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) def testValueErrorThrownWhenWeightIsNone(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.absolute_difference( self._predictions, self._predictions, weights=None) def testAllCorrectNoLossWeight(self): loss = losses.absolute_difference(self._predictions, self._predictions) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) def testNonZeroLoss(self): loss = losses.absolute_difference(self._labels, self._predictions) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(5.5, loss.eval(), 3) def testNonZeroLossWithPythonScalarWeight(self): weights = 2.3 loss = losses.absolute_difference(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(5.5 * weights, loss.eval(), 3) def testNonZeroLossWithScalarTensorWeight(self): weights = 2.3 loss = losses.absolute_difference(self._labels, self._predictions, constant_op.constant(weights)) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(5.5 * weights, loss.eval(), 3) def testNonZeroLossWithOneDimBatchSpecificWeights(self): weights = constant_op.constant((1.2, 0.0), shape=(2, 1)) loss = losses.absolute_difference(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(5.6, loss.eval(), 3) def testNonZeroLossWithTwoDimBatchSpecificWeights(self): weights = constant_op.constant([1.2, 0.0], shape=[2, 1]) loss = losses.absolute_difference(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(5.6, loss.eval(), 3) def testNonZeroLossWithSampleSpecificWeights(self): weights = constant_op.constant([3, 6, 5, 0, 4, 2], shape=[2, 3]) loss = losses.absolute_difference(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(16.6, loss.eval(), 3) def testNonZeroLossWithSampleSpecificWeightsMostZero(self): weights = constant_op.constant([0, 0, 0, 0, 0, 2], shape=[2, 3]) loss = losses.absolute_difference(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(6.0, loss.eval(), 3) def testLossWithSampleSpecificWeightsAllZero(self): weights = array_ops.zeros((2, 3)) loss = losses.absolute_difference(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) @test_util.assert_no_new_pyobjects_executing_eagerly @@ -134,12 +134,12 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.softmax_cross_entropy(labels, logits, weights=None) def testAllCorrect(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) @@ -152,7 +152,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) - with self.test_session(): + with self.cached_session(): loss = losses.softmax_cross_entropy(labels, logits) self.assertEquals(loss.op.name, 'softmax_cross_entropy_loss/value') self.assertAlmostEqual(loss.eval(), 10.0, 3) @@ -162,7 +162,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) weights = 2.3 - with self.test_session(): + with self.cached_session(): loss = losses.softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) @@ -171,7 +171,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) weights = 2.3 - with self.test_session(): + with self.cached_session(): loss = losses.softmax_cross_entropy(labels, logits, constant_op.constant(weights)) self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) @@ -181,7 +181,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) weights = constant_op.constant((1.2, 3.4, 5.6)) - with self.test_session(): + with self.cached_session(): loss = losses.softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3) @@ -190,7 +190,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) weights = constant_op.constant([0, 0, 0], shape=[3]) - with self.test_session(): + with self.cached_session(): loss = losses.softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(0.0, loss.eval(), 3) @@ -199,12 +199,12 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) weights = constant_op.constant([1.2, 0, 0], shape=[3]) - with self.test_session(): + with self.cached_session(): loss = losses.softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(12.0, loss.eval(), 3) def testSoftmaxWithMeasurementSpecificWeightsRaisesException(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -215,7 +215,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): losses.softmax_cross_entropy(labels, logits, weights=weights).eval() def testSoftmaxLabelSmoothing(self): - with self.test_session(): + with self.cached_session(): # Softmax Cross Entropy Loss is: # -\sum_i p_i \log q_i # where for a softmax activation @@ -242,12 +242,12 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0], [1], [2]]) - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.sparse_softmax_cross_entropy(labels, logits, weights=None) def testAllCorrectInt32Labels(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0], [1], [2]], dtype=dtypes.int32) @@ -263,7 +263,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): losses.sparse_softmax_cross_entropy(labels, logits) def testAllCorrectInt64Labels(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0], [1], [2]], dtype=dtypes.int64) @@ -272,7 +272,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): self.assertAlmostEqual(loss.eval(), 0.0, 3) def testAllCorrectNonColumnLabels(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([0, 1, 2]) @@ -285,7 +285,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]], dtype=dtypes.int32) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits) self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value') self.assertAlmostEqual(loss.eval(), 10.0, 3) @@ -295,7 +295,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]], dtype=dtypes.int64) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits) self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value') self.assertAlmostEqual(loss.eval(), 10.0, 3) @@ -305,7 +305,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([2, 0, 1]) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits) self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value') self.assertAlmostEqual(loss.eval(), 10.0, 3) @@ -315,7 +315,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = 2.3 - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) @@ -324,7 +324,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = 2.3 - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, constant_op.constant(weights)) self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) @@ -334,7 +334,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = 2.3 - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy( labels, logits, constant_op.constant((weights,))) self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) @@ -345,7 +345,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = array_ops.placeholder(dtypes.float32) - with self.test_session() as sess: + with self.cached_session() as sess: loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) loss_val = sess.run(loss, feed_dict={weights: ((1.2,), (3.4,), (5.6,))}) @@ -355,7 +355,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): logits = array_ops.placeholder(dtypes.float32) labels = array_ops.placeholder(dtypes.int32) weights = 1.0 - with self.test_session() as sess: + with self.cached_session() as sess: loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) loss_val = sess.run(loss, feed_dict={ @@ -370,7 +370,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): logits = array_ops.placeholder(dtypes.float32, shape=(None, 3)) labels = array_ops.placeholder(dtypes.int32, shape=(None, 1)) weights = array_ops.placeholder(dtypes.float32) - with self.test_session() as sess: + with self.cached_session() as sess: loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) loss_val = sess.run(loss, feed_dict={ @@ -387,7 +387,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = constant_op.constant([1.2, 3.4, 5.6], shape=(3, 1)) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3) @@ -396,7 +396,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = constant_op.constant([[1.2], [3.4], [5.6]]) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3) @@ -405,7 +405,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = constant_op.constant([0, 0, 0], shape=(3, 1)) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(0.0, loss.eval(), 3) @@ -414,12 +414,12 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) weights = constant_op.constant([1.2, 0, 0], shape=(3, 1)) - with self.test_session(): + with self.cached_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(12.0, loss.eval(), 3) def testMeasurementSpecificWeightsRaisesException(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -432,7 +432,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): def testInconsistentWeightSizeRaisesException(self): """The weight tensor has incorrect number of elements.""" - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -445,7 +445,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): def testInconsistentLabelSizeRaisesException(self): """The label tensor has incorrect number of elements.""" - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -458,7 +458,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): def testInconsistentWeightShapeRaisesException(self): """The weight tensor has incorrect shape.""" - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0, -100.0], [-100.0, 100.0, -100.0, -100.0], [-100.0, -100.0, 100.0, -100.0], @@ -472,7 +472,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): def testInconsistentLabelShapeRaisesException(self): """The label tensor has incorrect shape.""" - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0, -100.0], [-100.0, 100.0, -100.0, -100.0], [-100.0, -100.0, 100.0, -100.0], @@ -488,7 +488,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): class SigmoidCrossEntropyLossTest(test.TestCase): def testAllCorrectSigmoid(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -506,7 +506,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): loss = losses.sigmoid_cross_entropy(labels, logits, weights) self.assertEquals(logits.dtype, loss.dtype) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={ logits: np.ones((32, 1)), @@ -522,7 +522,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): loss = losses.sigmoid_cross_entropy(labels, logits, weights) self.assertEquals(logits.dtype, loss.dtype) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={ logits: np.ones((32, 2)), @@ -531,7 +531,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): self.assertAlmostEqual(0.313, loss, 3) def testAllWrongSigmoid(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -542,7 +542,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): self.assertAlmostEqual(loss.eval(), 600.0 / 9.0, 3) def testAllWrongSigmoidWithMeasurementSpecificWeights(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0], [-100.0, 100.0, -100.0], [-100.0, -100.0, 100.0]]) @@ -562,7 +562,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): self.assertEquals(logits.dtype, loss.dtype) self.assertEquals('sigmoid_cross_entropy_loss/value', loss.op.name) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) def testSigmoidFloat64(self): @@ -577,7 +577,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): loss = losses.sigmoid_cross_entropy(labels, logits) self.assertEquals(logits.dtype, loss.dtype) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(44.444, loss.eval(), 3) def testSigmoidNoReduction(self): @@ -590,7 +590,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): labels, logits, reduction=losses.Reduction.NONE) self.assertEquals(logits.dtype, loss.dtype) - with self.test_session(): + with self.cached_session(): self.assertAllClose(( (0., 0., 0.), (0., 100., 100.), @@ -598,7 +598,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): ), loss.eval(), 3) def testSigmoidLabelSmoothingCorrect(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[100.0, -100.0, -100.0]]) labels = constant_op.constant([[1, 0, 1]]) # Sigmoid cross entropy loss is: @@ -621,7 +621,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): self.assertAlmostEqual(loss.eval(), expected_value, 3) def testSigmoidLabelSmoothingEqualsSoftmaxTwoLabel(self): - with self.test_session(): + with self.cached_session(): label_smoothing = 0.1 sigmoid_logits = constant_op.constant([[100.0, -100.0, -100.0]]) sigmoid_labels = constant_op.constant([[1, 0, 1]]) @@ -656,33 +656,33 @@ class LogLossTest(test.TestCase): self._labels = constant_op.constant(labels) def testValueErrorThrownWhenWeightIsNone(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.log_loss(self._labels, self._labels, weights=None) def testAllCorrectNoLossWeight(self): loss = losses.log_loss(self._labels, self._labels) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) def testAllCorrectNoLossWeightWithPlaceholder(self): tf_predictions = array_ops.placeholder( dtypes.float32, shape=self._np_labels.shape) loss = losses.log_loss(self._labels, tf_predictions) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual( 0.0, loss.eval(feed_dict={tf_predictions: self._np_labels}), 3) def testNonZeroLoss(self): loss = losses.log_loss(self._labels, self._predictions) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(-np.sum(self._expected_losses) / 6.0, loss.eval(), 3) def testNonZeroLossWithPythonScalarWeight(self): weights = 2.3 loss = losses.log_loss(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0, loss.eval(), 3) @@ -690,7 +690,7 @@ class LogLossTest(test.TestCase): weights = 2.3 loss = losses.log_loss(self._labels, self._predictions, constant_op.constant(weights)) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0, loss.eval(), 3) @@ -700,7 +700,7 @@ class LogLossTest(test.TestCase): weights = 2.3 loss = losses.log_loss(self._labels, tf_predictions, constant_op.constant(weights)) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions}) self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0, loss, 3) @@ -710,7 +710,7 @@ class LogLossTest(test.TestCase): weights = 2.3 loss = losses.log_loss(self._labels, tf_predictions, constant_op.constant(weights)) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions}) self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0, loss, 3) @@ -721,7 +721,7 @@ class LogLossTest(test.TestCase): self._expected_losses, np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))) loss = losses.log_loss(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(-np.sum(expected_losses) / 6.0, loss.eval(), 3) def testNonZeroLossWithOneDimBatchSpecificWeightsSomeZero(self): @@ -730,7 +730,7 @@ class LogLossTest(test.TestCase): np.asarray([1.2, 1.2, 1.2, 0, 0, 0]).reshape( (2, 3))) loss = losses.log_loss(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(-np.sum(expected_losses) / 3.0, loss.eval(), 3) def testNonZeroLossWithTwoDimBatchSpecificWeightsSomeZero(self): @@ -739,12 +739,12 @@ class LogLossTest(test.TestCase): np.asarray([1.2, 1.2, 1.2, 0, 0, 0]).reshape( (2, 3))) loss = losses.log_loss(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(-np.sum(expected_losses) / 3.0, loss.eval(), 3) def testWeightsWithSameNumDimsButWrongShapeThrowsException(self): weights = constant_op.constant(np.random.normal(size=(2, 4)), shape=[2, 4]) - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.log_loss(self._labels, self._predictions, weights) @@ -757,7 +757,7 @@ class LogLossTest(test.TestCase): self._predictions, constant_op.constant( weights, shape=(2, 3))) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(-np.sum(expected_losses) / 5.0, loss.eval(), 3) def testNonZeroLossWithMeasurementSpecificWeightsWithPlaceholder(self): @@ -771,7 +771,7 @@ class LogLossTest(test.TestCase): constant_op.constant( weights, shape=(2, 3))) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions}) self.assertAlmostEqual(-np.sum(expected_losses) / 5.0, loss, 3) @@ -784,7 +784,7 @@ class LogLossTest(test.TestCase): self._predictions, constant_op.constant( weights, shape=(2, 3))) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(-np.sum(expected_losses), loss.eval(), 3) def testNonZeroLossWithSampleSpecificWeightsMostZeroWithPlaceholder(self): @@ -795,35 +795,35 @@ class LogLossTest(test.TestCase): tf_weights = constant_op.constant(weights, shape=(2, 3)) loss = losses.log_loss(self._labels, tf_predictions, tf_weights) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={tf_predictions: self._np_predictions}) self.assertAlmostEqual(-np.sum(expected_losses), loss, 3) def testLossWithSampleSpecificWeightsAllZero(self): tf_weights = array_ops.zeros(shape=(2, 3)) loss = losses.log_loss(self._labels, self._predictions, tf_weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) class HingeLossTest(test.TestCase): def testIncompatibleShapes(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[-1.0], [2.1]]) labels = constant_op.constant([0.0, 1.0]) with self.assertRaises(ValueError): _ = losses.hinge_loss(labels, logits).eval() def testAllOutsideMargin(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([1.2, -1.4, -1.0, 2.1]) labels = constant_op.constant([1.0, 0.0, 0.0, 1.0]) loss = losses.hinge_loss(labels, logits) self.assertAllClose(loss.eval(), 0.0, atol=1e-3) def testSomeInsideMargin(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[-0.7], [-1.4], [1.4], [0.6]]) labels = constant_op.constant([[0.0], [0.0], [1.0], [1.0]]) loss = losses.hinge_loss(labels, logits) @@ -832,7 +832,7 @@ class HingeLossTest(test.TestCase): self.assertAllClose(loss.eval(), 0.175, atol=1e-3) def testSomeMisclassified(self): - with self.test_session(): + with self.cached_session(): logits = constant_op.constant([[[1.2], [0.4], [-1.0], [-1.1]]]) labels = constant_op.constant([[[1.0], [0.0], [0.0], [1.0]]]) loss = losses.hinge_loss(labels, logits) @@ -844,14 +844,14 @@ class HingeLossTest(test.TestCase): class HuberLossTest(test.TestCase): def testIncompatibleShapes(self): - with self.test_session(): + with self.cached_session(): predictions = constant_op.constant([[-1.0], [2.1]]) labels = constant_op.constant([0.0, 1.0]) with self.assertRaises(ValueError): _ = losses.huber_loss(labels, predictions).eval() def testAllQuadratic(self): - with self.test_session(): + with self.cached_session(): predictions = constant_op.constant([1.5, -1.4, -1.0, 0.0]) labels = constant_op.constant([1.0, -1.0, 0.0, 0.5]) loss = losses.huber_loss(labels, predictions) @@ -859,7 +859,7 @@ class HuberLossTest(test.TestCase): 0.5 * (0.25 + 0.16 + 1.0 + 0.25) / 4., atol=1e-5) def testAllLinear(self): - with self.test_session(): + with self.cached_session(): predictions = constant_op.constant([1.5, -1.4, -1.0, 0.0]) labels = constant_op.constant([0.0, 1.0, 0.0, 1.5]) loss = losses.huber_loss(labels, predictions) @@ -867,7 +867,7 @@ class HuberLossTest(test.TestCase): (1.5 + 2.4 + 1.0 + 1.5) / 4. - 0.5, atol=1e-5) def testMixedQuadraticLinear(self): - with self.test_session(): + with self.cached_session(): predictions = constant_op.constant([[1.5, -1.4, -1.0, 0.0], [1.5, -1.4, -1.0, 0.0]]) labels = constant_op.constant([[1.0, -1.0, 0.0, 0.5], @@ -879,7 +879,7 @@ class HuberLossTest(test.TestCase): self.assertAllClose(loss.eval(), expected_loss, atol=1e-5) def testAllQuadraticDelta(self): - with self.test_session(): + with self.cached_session(): delta = 0.5 predictions = constant_op.constant([1.5, -1.4, -0.5, 0.0]) labels = constant_op.constant([1.0, -1.0, 0.0, 0.5]) @@ -894,7 +894,7 @@ class HuberLossTest(test.TestCase): expected = delta * np.array([1.5, 2.4, 1.0, 1.5]).mean() expected -= 0.5 * delta**2 loss = losses.huber_loss(labels, predictions, delta=delta) - with self.test_session(): + with self.cached_session(): self.assertAllClose(expected, loss.eval(), atol=1e-5) @@ -906,13 +906,13 @@ class MeanSquaredErrorTest(test.TestCase): self._labels = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) def testValueErrorThrownWhenWeightIsNone(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.mean_squared_error( self._predictions, self._predictions, weights=None) def testScalar(self): - with self.test_session(): + with self.cached_session(): self.assertEqual( 0.0, losses.mean_squared_error(predictions=constant_op.constant(0), @@ -920,55 +920,55 @@ class MeanSquaredErrorTest(test.TestCase): def testAllCorrectNoLossWeight(self): loss = losses.mean_squared_error(self._predictions, self._predictions) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) def testNonZeroLoss(self): loss = losses.mean_squared_error(self._labels, self._predictions) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(49.5, loss.eval(), 3) def testNonZeroLossWithPythonScalarWeight(self): weights = 2.3 loss = losses.mean_squared_error(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(49.5 * weights, loss.eval(), 3) def testNonZeroLossWithScalarTensorWeight(self): weights = 2.3 loss = losses.mean_squared_error(self._labels, self._predictions, constant_op.constant(weights)) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(49.5 * weights, loss.eval(), 3) def testNonZeroLossWithOneDimBatchSpecificWeights(self): weights = constant_op.constant([1.2, 3.4], shape=(2, 1)) loss = losses.mean_squared_error(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(767.8 / 6.0, loss.eval(), 3) def testNonZeroLossWithTwoDimBatchSpecificWeights(self): weights = constant_op.constant([1.2, 3.4], shape=[2, 1]) loss = losses.mean_squared_error(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(767.8 / 6.0, loss.eval(), 3) def testNonZeroLossWithSampleSpecificWeights(self): weights = constant_op.constant([3, 6, 5, 0, 4, 2], shape=[2, 3]) loss = losses.mean_squared_error(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(587 / 5.0, loss.eval(), 3) def testNonZeroLossWithSampleSpecificWeightsMostZero(self): weights = constant_op.constant([0, 0, 0, 0, 0, 2], shape=[2, 3]) loss = losses.mean_squared_error(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(18.0, loss.eval(), 3) def testLossWithSampleSpecificWeightsAllZero(self): weights = array_ops.zeros((2, 3)) loss = losses.mean_squared_error(self._labels, self._predictions, weights) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0.0, loss.eval(), 3) @@ -994,7 +994,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): self._expected_losses = np.divide(total, 3.0) def testValueErrorThrownWhenWeightIsNone(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.mean_pairwise_squared_error( predictions=constant_op.constant(self._labels), @@ -1003,7 +1003,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): def _test_valid_weights( self, labels, predictions, expected_loss, weights=1.0): - with self.test_session(): + with self.cached_session(): static_inputs_op = losses.mean_pairwise_squared_error( predictions=predictions, labels=labels, weights=weights) self.assertAlmostEqual(expected_loss, static_inputs_op.eval(), places=3) @@ -1054,7 +1054,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): init_op = variables.global_variables_initializer() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) for grad, _ in gradients_to_variables: np_grad = sess.run(grad) @@ -1073,7 +1073,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): predictions=constant_op.constant(self._predictions), labels=constant_op.constant(self._labels), weights=constant_op.constant(weights)) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(weights * np.sum(self._expected_losses), loss.eval(), 3) @@ -1122,7 +1122,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): predictions=predictions_placeholder, labels=labels_placeholder, weights=weights_placeholder) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): dynamic_inputs_op.eval(feed_dict={ predictions_placeholder: predictions, @@ -1191,7 +1191,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): labels=array_ops.concat([labels0, labels1], 0), predictions=array_ops.concat([predictions0, predictions1], 0)) - with self.test_session() as session: + with self.cached_session() as session: loss0, loss1, loss0_1 = session.run([loss0, loss1, loss0_1]) self.assertTrue(loss0 > 0) @@ -1216,7 +1216,7 @@ class CosineDistanceLossTest(test.TestCase): [0, 0, 1], [0, 1, 0]]).reshape((3, 2, 3)) def testValueErrorThrownWhenWeightIsNone(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): losses.cosine_distance( predictions=constant_op.constant(self._labels), @@ -1229,7 +1229,7 @@ class CosineDistanceLossTest(test.TestCase): predictions=constant_op.constant(self._labels), labels=constant_op.constant(self._labels), dim=2) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(0, loss.eval(), 5) def testPartiallyCorrectWithIntegerValues(self): @@ -1237,7 +1237,7 @@ class CosineDistanceLossTest(test.TestCase): predictions=constant_op.constant(self._predictions), labels=constant_op.constant(self._labels), dim=2) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(1, loss.eval(), 5) def testPartiallyCorrectFloatingPointValues(self): @@ -1255,7 +1255,7 @@ class CosineDistanceLossTest(test.TestCase): labels, shape=(3, 1, 3), dtype=dtypes.float32) loss = losses.cosine_distance(tf_labels, tf_preds, dim=2) - with self.test_session(): + with self.cached_session(): self.assertAlmostEqual(1.0, loss.eval(), 5) def testSampleSpecificWeights(self): @@ -1264,7 +1264,7 @@ class CosineDistanceLossTest(test.TestCase): labels=constant_op.constant(self._labels), dim=2, weights=np.asarray((1, 0, 0)).reshape((3, 1, 1))) - with self.test_session(): + with self.cached_session(): self.assertEqual(1.0, loss.eval()) def testMeasurementSpecificWeights(self): @@ -1274,7 +1274,7 @@ class CosineDistanceLossTest(test.TestCase): dim=2, weights=constant_op.constant( [1, 0, 0, 1, 1, 1], shape=(3, 2, 1))) - with self.test_session(): + with self.cached_session(): self.assertEqual(3.0 / 4.0, loss.eval()) def testMeasurementSpecificWeightsWithPlaceholderWithShape(self): @@ -1286,7 +1286,7 @@ class CosineDistanceLossTest(test.TestCase): dim=2, weights=constant_op.constant( [1, 0, 0, 1, 1, 1], shape=(3, 2, 1))) - with self.test_session() as sess: + with self.cached_session() as sess: loss = sess.run(loss, feed_dict={tf_predictions: self._predictions}) self.assertEqual(3.0 / 4.0, loss) @@ -1296,7 +1296,7 @@ class CosineDistanceLossTest(test.TestCase): labels=constant_op.constant(self._labels), dim=2, weights=array_ops.zeros((3, 1, 1))) - with self.test_session(): + with self.cached_session(): self.assertEqual(0, loss.eval()) def testZeroLossWhenAllMeasurementSpecificWeightsAreZero(self): @@ -1305,7 +1305,7 @@ class CosineDistanceLossTest(test.TestCase): labels=constant_op.constant(self._labels), dim=2, weights=array_ops.zeros((3, 2, 1))) - with self.test_session(): + with self.cached_session(): self.assertEqual(0, loss.eval()) @@ -1411,7 +1411,7 @@ class ComputeWeightedLossTest(test.TestCase): weighted_loss = losses.compute_weighted_loss( self._raw_losses, weights=weight) self.assertEqual(1, len(util.get_losses())) - with self.test_session(): + with self.cached_session(): self.assertAllClose( np.mean(weight * self._raw_losses), weighted_loss.eval()) @@ -1429,7 +1429,7 @@ class ComputeWeightedLossTest(test.TestCase): weighted_loss = losses.compute_weighted_loss( self._raw_losses, weights=weights_placeholder) self.assertEqual(1, len(util.get_losses())) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): weighted_loss.eval(feed_dict={weights_placeholder: weights}) @@ -1452,7 +1452,7 @@ class ComputeWeightedLossTest(test.TestCase): weighted_loss = losses.compute_weighted_loss( raw_losses, weights=weights_placeholder) self.assertEqual(1, len(util.get_losses())) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): weighted_loss.eval(feed_dict={weights_placeholder: weights}) diff --git a/tensorflow/python/kernel_tests/manip_ops_test.py b/tensorflow/python/kernel_tests/manip_ops_test.py index dc3ea38671..f71857a3cb 100644 --- a/tensorflow/python/kernel_tests/manip_ops_test.py +++ b/tensorflow/python/kernel_tests/manip_ops_test.py @@ -42,12 +42,12 @@ class RollTest(test_util.TensorFlowTestCase): def _testRoll(self, np_input, shift, axis): expected_roll = np.roll(np_input, shift, axis) - with self.test_session(): + with self.cached_session(): roll = manip_ops.roll(np_input, shift, axis) self.assertAllEqual(roll.eval(), expected_roll) def _testGradient(self, np_input, shift, axis): - with self.test_session(): + with self.cached_session(): inx = constant_op.constant(np_input.tolist()) xs = list(np_input.shape) y = manip_ops.roll(inx, shift, axis) @@ -94,7 +94,7 @@ class RollTest(test_util.TensorFlowTestCase): self._testAll(np.random.randint(-100, 100, (5)).astype(np.int32), 3, -1) self._testAll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), 3, -2) # Make sure negative axis should be 0 <= axis + dims < dims - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of range"): manip_ops.roll(np.random.randint(-100, 100, (4, 4)).astype(np.int32), @@ -111,7 +111,7 @@ class RollTest(test_util.TensorFlowTestCase): tensor = array_ops.placeholder(dtype=dtypes.int32) shift = 1 axis = 0 - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "input must be 1-D or higher"): manip_ops.roll(tensor, shift, axis).eval(feed_dict={tensor: 7}) @@ -127,7 +127,7 @@ class RollTest(test_util.TensorFlowTestCase): tensor = [[1, 2], [3, 4]] shift = 1 axis = array_ops.placeholder(dtype=dtypes.int32) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "axis must be a scalar or a 1-D vector"): manip_ops.roll(tensor, shift, axis).eval(feed_dict={axis: [[0, 1]]}) @@ -143,7 +143,7 @@ class RollTest(test_util.TensorFlowTestCase): tensor = [[1, 2], [3, 4]] shift = array_ops.placeholder(dtype=dtypes.int32) axis = 1 - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "shift must be a scalar or a 1-D vector"): manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [[0, 1]]}) @@ -158,7 +158,7 @@ class RollTest(test_util.TensorFlowTestCase): tensor = [[1, 2], [3, 4]] shift = array_ops.placeholder(dtype=dtypes.int32) axis = [0, 1] - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "shift and axis must have the same size"): manip_ops.roll(tensor, shift, axis).eval(feed_dict={shift: [1]}) @@ -167,7 +167,7 @@ class RollTest(test_util.TensorFlowTestCase): tensor = [1, 2] shift = 1 axis = 1 - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "is out of range"): manip_ops.roll(tensor, shift, axis).eval() diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py index b167278984..309da8f184 100644 --- a/tensorflow/python/kernel_tests/matmul_op_test.py +++ b/tensorflow/python/kernel_tests/matmul_op_test.py @@ -206,7 +206,7 @@ class MatMulInfixOperatorTest(test_lib.TestCase): b = ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0], [80.0, 90.0]]) c = infix_matmul(a, b) d = math_ops.matmul(a, b) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(c.eval(), d.eval()) diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py index f41967ff98..720ba806e9 100644 --- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py @@ -114,7 +114,7 @@ class InverseOpTest(test.TestCase): def testNotInvertible(self): # The input should be invertible. - with self.test_session(): + with self.cached_session(): with self.assertRaisesOpError("Input is not invertible."): # All rows of the matrix below add to zero. tensor3 = constant_op.constant([[1., 0., -1.], [-1., 1., 0.], diff --git a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py index 33288392c0..dd01ba11af 100644 --- a/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_triangular_solve_op_test.py @@ -143,7 +143,7 @@ class MatrixTriangularSolveOpTest(test.TestCase): def testNonSquareMatrix(self): # A non-square matrix should cause an error. matrix = np.array([[1., 2., 3.], [3., 4., 5.]]) - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): self._verifySolve(matrix, matrix) with self.assertRaises(ValueError): @@ -154,7 +154,7 @@ class MatrixTriangularSolveOpTest(test.TestCase): # right-hand sides. matrix = np.array([[1., 0.], [0., 1.]]) rhs = np.array([[1., 0.]]) - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): self._verifySolve(matrix, rhs) with self.assertRaises(ValueError): @@ -164,7 +164,7 @@ class MatrixTriangularSolveOpTest(test.TestCase): # The input should be invertible. # The matrix is singular because it has a zero on the diagonal. singular_matrix = np.array([[1., 0., -1.], [-1., 0., 1.], [0., -1., 1.]]) - with self.test_session(): + with self.cached_session(): with self.assertRaisesOpError("Input matrix is not invertible."): self._verifySolve(singular_matrix, singular_matrix) with self.assertRaisesOpError("Input matrix is not invertible."): diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index 55653489af..5dcdb9e420 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -192,7 +192,7 @@ class MeanTest(test.TestCase): self.assertListEqual(ops.get_collection(my_collection_name), [update_op]) def testBasic(self): - with self.test_session() as sess: + with self.cached_session() as sess: values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) _enqueue_vector(sess, values_queue, [0, 1]) @@ -209,7 +209,7 @@ class MeanTest(test.TestCase): self.assertAlmostEqual(1.65, sess.run(mean), 5) def testUpdateOpsReturnsCurrentValue(self): - with self.test_session() as sess: + with self.cached_session() as sess: values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) _enqueue_vector(sess, values_queue, [0, 1]) @@ -253,7 +253,7 @@ class MeanTest(test.TestCase): metrics.mean(values, weights=np.ones((3, 2, 4, 1))), metrics.mean(values, weights=np.ones((3, 2, 4, 1, 1))),) expected = np.mean(values) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() for mean_result in mean_results: mean, update_op = mean_result @@ -266,7 +266,7 @@ class MeanTest(test.TestCase): np.sum(np.multiply(weights, np.ones_like(values))) ) mean, update_op = metrics.mean(values, weights=weights) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() self.assertAlmostEqual(expected, update_op.eval(), places=5) self.assertAlmostEqual(expected, mean.eval(), places=5) @@ -330,7 +330,7 @@ class MeanTest(test.TestCase): # Dynamic shapes. with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): - with self.test_session(): + with self.cached_session(): _, update_op = metrics.mean(values_placeholder, invalid_weight) variables.local_variables_initializer().run() update_op.eval(feed_dict={values_placeholder: values}) @@ -359,7 +359,7 @@ class MeanTensorTest(test.TestCase): self.assertListEqual(ops.get_collection(my_collection_name), [update_op]) def testBasic(self): - with self.test_session() as sess: + with self.cached_session() as sess: values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) _enqueue_vector(sess, values_queue, [0, 1]) @@ -376,7 +376,7 @@ class MeanTensorTest(test.TestCase): self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(mean)) def testMultiDimensional(self): - with self.test_session() as sess: + with self.cached_session() as sess: values_queue = data_flow_ops.FIFOQueue( 2, dtypes=dtypes_lib.float32, shapes=(2, 2, 2)) _enqueue_vector( @@ -397,7 +397,7 @@ class MeanTensorTest(test.TestCase): self.assertAllClose([[[1, 2], [1, 2]], [[2, 3], [5, 6]]], sess.run(mean)) def testUpdateOpsReturnsCurrentValue(self): - with self.test_session() as sess: + with self.cached_session() as sess: values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) _enqueue_vector(sess, values_queue, [0, 1]) @@ -418,7 +418,7 @@ class MeanTensorTest(test.TestCase): self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(mean), 5) def testBinaryWeighted1d(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the values. values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) @@ -445,7 +445,7 @@ class MeanTensorTest(test.TestCase): self.assertAllClose([[3.25, 0.5]], sess.run(mean), 5) def testWeighted1d(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the values. values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) @@ -472,7 +472,7 @@ class MeanTensorTest(test.TestCase): self.assertAllClose([[0.8, 3.52]], sess.run(mean), 5) def testWeighted2d_1(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the values. values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) @@ -499,7 +499,7 @@ class MeanTensorTest(test.TestCase): self.assertAllClose([[-2.1, 0.5]], sess.run(mean), 5) def testWeighted2d_2(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the values. values_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 2)) @@ -575,7 +575,7 @@ class AccuracyTest(test.TestCase): (10, 3), maxval=3, dtype=dtypes_lib.int64, seed=1) accuracy, update_op = metrics.accuracy(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -588,7 +588,7 @@ class AccuracyTest(test.TestCase): self.assertEqual(initial_accuracy, accuracy.eval()) def testMultipleUpdates(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 1)) @@ -618,7 +618,7 @@ class AccuracyTest(test.TestCase): def testEffectivelyEquivalentSizes(self): predictions = array_ops.ones((40, 1)) labels = array_ops.ones((40,)) - with self.test_session() as sess: + with self.cached_session() as sess: accuracy, update_op = metrics.accuracy(labels, predictions) sess.run(variables.local_variables_initializer()) @@ -628,7 +628,7 @@ class AccuracyTest(test.TestCase): def testEffectivelyEquivalentSizesWithScalarWeight(self): predictions = array_ops.ones((40, 1)) labels = array_ops.ones((40,)) - with self.test_session() as sess: + with self.cached_session() as sess: accuracy, update_op = metrics.accuracy(labels, predictions, weights=2.0) sess.run(variables.local_variables_initializer()) @@ -642,7 +642,7 @@ class AccuracyTest(test.TestCase): weights = array_ops.expand_dims(ops.convert_to_tensor([100, 1, 1]), 1) # shape 3, 1 - with self.test_session() as sess: + with self.cached_session() as sess: accuracy, update_op = metrics.accuracy(labels, predictions, weights) sess.run(variables.local_variables_initializer()) @@ -662,7 +662,7 @@ class AccuracyTest(test.TestCase): dtype=dtypes_lib.int32, name='weights') feed_dict = {weights_placeholder: weights} - with self.test_session() as sess: + with self.cached_session() as sess: accuracy, update_op = metrics.accuracy(labels, predictions, weights_placeholder) @@ -674,7 +674,7 @@ class AccuracyTest(test.TestCase): self.assertGreater(accuracy.eval(feed_dict=feed_dict), .95) def testMultipleUpdatesWithWeightedValues(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 4, dtypes=dtypes_lib.float32, shapes=(1, 1)) @@ -746,7 +746,7 @@ class PrecisionTest(test.TestCase): (10, 3), maxval=1, dtype=dtypes_lib.int64, seed=1) precision, update_op = metrics.precision(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -765,7 +765,7 @@ class PrecisionTest(test.TestCase): labels = constant_op.constant(inputs) precision, update_op = metrics.precision(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(1, sess.run(update_op)) self.assertAlmostEqual(1, precision.eval()) @@ -778,7 +778,7 @@ class PrecisionTest(test.TestCase): constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype) precision, update_op = metrics.precision(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.5, update_op.eval()) self.assertAlmostEqual(0.5, precision.eval()) @@ -789,7 +789,7 @@ class PrecisionTest(test.TestCase): precision, update_op = metrics.precision( labels, predictions, weights=constant_op.constant([[2], [5]])) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() weighted_tp = 2.0 + 5.0 weighted_positives = (2.0 + 2.0) + (5.0 + 5.0) @@ -806,7 +806,7 @@ class PrecisionTest(test.TestCase): } precision, update_op = metrics.precision(labels, predictions, weights=2) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() weighted_tp = 2.0 + 2.0 weighted_positives = (2.0 + 2.0) + (2.0 + 2.0) @@ -826,7 +826,7 @@ class PrecisionTest(test.TestCase): precision, update_op = metrics.precision( labels, predictions, weights=constant_op.constant([[2], [5]])) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() weighted_tp = 2.0 + 5.0 weighted_positives = (2.0 + 2.0) + (5.0 + 5.0) @@ -844,7 +844,7 @@ class PrecisionTest(test.TestCase): predictions, weights=constant_op.constant([[1, 2, 3, 4], [4, 3, 2, 1]])) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() weighted_tp = 3.0 + 4.0 weighted_positives = (1.0 + 3.0) + (4.0 + 2.0) @@ -864,7 +864,7 @@ class PrecisionTest(test.TestCase): predictions, weights=constant_op.constant([[1, 2, 3, 4], [4, 3, 2, 1]])) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() weighted_tp = 3.0 + 4.0 weighted_positives = (1.0 + 3.0) + (4.0 + 2.0) @@ -881,7 +881,7 @@ class PrecisionTest(test.TestCase): labels = constant_op.constant(1 - inputs) precision, update_op = metrics.precision(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) sess.run(update_op) self.assertAlmostEqual(0, precision.eval()) @@ -891,7 +891,7 @@ class PrecisionTest(test.TestCase): labels = constant_op.constant([0, 0, 0, 0]) precision, update_op = metrics.precision(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) sess.run(update_op) self.assertEqual(0.0, precision.eval()) @@ -933,7 +933,7 @@ class RecallTest(test.TestCase): (10, 3), maxval=1, dtype=dtypes_lib.int64, seed=1) recall, update_op = metrics.recall(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -952,7 +952,7 @@ class RecallTest(test.TestCase): labels = constant_op.constant(np_inputs) recall, update_op = metrics.recall(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) sess.run(update_op) self.assertEqual(1, recall.eval()) @@ -965,7 +965,7 @@ class RecallTest(test.TestCase): constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype) recall, update_op = metrics.recall(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.5, update_op.eval()) self.assertAlmostEqual(0.5, recall.eval()) @@ -976,7 +976,7 @@ class RecallTest(test.TestCase): weights = constant_op.constant([[2], [5]]) recall, update_op = metrics.recall(labels, predictions, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) weighted_tp = 2.0 + 5.0 weighted_t = (2.0 + 2.0) + (5.0 + 5.0) @@ -990,7 +990,7 @@ class RecallTest(test.TestCase): weights = constant_op.constant([[1, 2, 3, 4], [4, 3, 2, 1]]) recall, update_op = metrics.recall(labels, predictions, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) weighted_tp = 3.0 + 1.0 weighted_t = (2.0 + 3.0) + (4.0 + 1.0) @@ -1005,7 +1005,7 @@ class RecallTest(test.TestCase): labels = constant_op.constant(1 - np_inputs) recall, update_op = metrics.recall(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) sess.run(update_op) self.assertEqual(0, recall.eval()) @@ -1015,7 +1015,7 @@ class RecallTest(test.TestCase): labels = array_ops.zeros((1, 4)) recall, update_op = metrics.recall(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) sess.run(update_op) self.assertEqual(0, recall.eval()) @@ -1055,7 +1055,7 @@ class AUCTest(test.TestCase): (10, 3), maxval=1, dtype=dtypes_lib.int64, seed=1) auc, update_op = metrics.auc(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -1073,7 +1073,7 @@ class AUCTest(test.TestCase): def allCorrectAsExpected(self, curve): inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) labels = constant_op.constant(inputs) auc, update_op = metrics.auc(labels, predictions, curve=curve) @@ -1084,7 +1084,7 @@ class AUCTest(test.TestCase): self.assertEqual(1, auc.eval()) def testSomeCorrect_multipleLabelDtypes(self): - with self.test_session() as sess: + with self.cached_session() as sess: for label_dtype in ( dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): predictions = constant_op.constant( @@ -1099,7 +1099,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(0.5, auc.eval()) def testWeighted1d(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) @@ -1112,7 +1112,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(0.5, auc.eval(), 5) def testWeighted2d(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) @@ -1127,7 +1127,7 @@ class AUCTest(test.TestCase): # Regarding the AUC-PR tests: note that the preferred method when # calculating AUC-PR is summation_method='careful_interpolation'. def testCorrectAUCPRSpecialCase(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [0.1, 0.4, 0.35, 0.8], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 0, 1, 1], shape=(1, 4)) @@ -1141,7 +1141,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(expected, auc.eval(), delta=1e-3) def testCorrectAnotherAUCPRSpecialCase(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], shape=(1, 7), @@ -1157,7 +1157,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(expected, auc.eval(), delta=1e-3) def testThirdCorrectAUCPRSpecialCase(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], shape=(1, 7), @@ -1173,7 +1173,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(expected, auc.eval(), delta=1e-3) def testIncorrectAUCPRSpecialCase(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [0.1, 0.4, 0.35, 0.8], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 0, 1, 1], shape=(1, 4)) @@ -1186,7 +1186,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) def testAnotherIncorrectAUCPRSpecialCase(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], shape=(1, 7), @@ -1201,7 +1201,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) def testThirdIncorrectAUCPRSpecialCase(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], shape=(1, 7), @@ -1218,7 +1218,7 @@ class AUCTest(test.TestCase): def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32) auc, update_op = metrics.auc(labels, predictions) @@ -1229,7 +1229,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(0, auc.eval()) def testZeroTruePositivesAndFalseNegativesGivesOneAUC(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = array_ops.zeros([4], dtype=dtypes_lib.float32) labels = array_ops.zeros([4]) auc, update_op = metrics.auc(labels, predictions) @@ -1240,7 +1240,7 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(1, auc.eval(), 6) def testRecallOneAndPrecisionOneGivesOnePRAUC(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = array_ops.ones([4], dtype=dtypes_lib.float32) labels = array_ops.ones([4]) auc, update_op = metrics.auc(labels, predictions, curve='PR') @@ -1301,7 +1301,7 @@ class AUCTest(test.TestCase): scale=1.0, size=num_samples)): expected_auc = self.np_auc(predictions, labels, weights) - with self.test_session() as sess: + with self.cached_session() as sess: enqueue_ops = [[] for i in range(num_batches)] tf_predictions = _enqueue_as_batches(predictions, enqueue_ops) tf_labels = _enqueue_as_batches(labels, enqueue_ops) @@ -1370,7 +1370,7 @@ class SpecificityAtSensitivityTest(test.TestCase): specificity, update_op = metrics.specificity_at_sensitivity( labels, predictions, sensitivity=0.7) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -1390,7 +1390,7 @@ class SpecificityAtSensitivityTest(test.TestCase): specificity, update_op = metrics.specificity_at_sensitivity( labels, predictions, sensitivity=0.7) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(1, sess.run(update_op)) self.assertEqual(1, specificity.eval()) @@ -1405,7 +1405,7 @@ class SpecificityAtSensitivityTest(test.TestCase): specificity, update_op = metrics.specificity_at_sensitivity( labels, predictions, sensitivity=0.8) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(1.0, sess.run(update_op)) self.assertAlmostEqual(1.0, specificity.eval()) @@ -1420,7 +1420,7 @@ class SpecificityAtSensitivityTest(test.TestCase): specificity, update_op = metrics.specificity_at_sensitivity( labels, predictions, sensitivity=0.4) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.6, sess.run(update_op)) @@ -1439,7 +1439,7 @@ class SpecificityAtSensitivityTest(test.TestCase): specificity, update_op = metrics.specificity_at_sensitivity( labels, predictions, weights=weights, sensitivity=0.4) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.6, sess.run(update_op)) @@ -1457,7 +1457,7 @@ class SpecificityAtSensitivityTest(test.TestCase): specificity, update_op = metrics.specificity_at_sensitivity( labels, predictions, weights=weights, sensitivity=0.4) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(8.0 / 15.0, sess.run(update_op)) @@ -1507,7 +1507,7 @@ class SensitivityAtSpecificityTest(test.TestCase): sensitivity, update_op = metrics.sensitivity_at_specificity( labels, predictions, specificity=0.7) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -1527,7 +1527,7 @@ class SensitivityAtSpecificityTest(test.TestCase): specificity, update_op = metrics.sensitivity_at_specificity( labels, predictions, specificity=0.7) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(1, sess.run(update_op)) self.assertEqual(1, specificity.eval()) @@ -1542,7 +1542,7 @@ class SensitivityAtSpecificityTest(test.TestCase): specificity, update_op = metrics.sensitivity_at_specificity( labels, predictions, specificity=0.8) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.8, sess.run(update_op)) self.assertAlmostEqual(0.8, specificity.eval()) @@ -1557,7 +1557,7 @@ class SensitivityAtSpecificityTest(test.TestCase): specificity, update_op = metrics.sensitivity_at_specificity( labels, predictions, specificity=0.4) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.6, sess.run(update_op)) self.assertAlmostEqual(0.6, specificity.eval()) @@ -1576,7 +1576,7 @@ class SensitivityAtSpecificityTest(test.TestCase): specificity, update_op = metrics.sensitivity_at_specificity( labels, predictions, weights=weights, specificity=0.4) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(0.675, sess.run(update_op)) self.assertAlmostEqual(0.675, specificity.eval()) @@ -1638,7 +1638,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): thresholds) rec, rec_op = metrics.recall_at_thresholds(labels, predictions, thresholds) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates, then verify idempotency. @@ -1654,7 +1654,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): def testAllCorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) labels = constant_op.constant(inputs) thresholds = [0.5] @@ -1670,7 +1670,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): self.assertEqual(1, rec.eval()) def testSomeCorrect_multipleLabelDtypes(self): - with self.test_session() as sess: + with self.cached_session() as sess: for label_dtype in ( dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): predictions = constant_op.constant( @@ -1692,7 +1692,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32) labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32) thresholds = [0.5] @@ -1708,7 +1708,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): self.assertAlmostEqual(0, rec.eval()) def testWeights1d(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes_lib.float32) labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2)) @@ -1738,7 +1738,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): self.assertAlmostEqual(0.0, rec_high.eval(), places=5) def testWeights2d(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes_lib.float32) labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2)) @@ -1768,7 +1768,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): self.assertAlmostEqual(0.0, rec_high.eval(), places=5) def testExtremeThresholds(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant([0, 1, 1, 1], shape=(1, 4)) @@ -1792,7 +1792,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): self.assertAlmostEqual(0.0, rec_high.eval()) def testZeroLabelsPredictions(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = array_ops.zeros([4], dtype=dtypes_lib.float32) labels = array_ops.zeros([4]) thresholds = [0.5] @@ -1842,7 +1842,7 @@ class PrecisionRecallThresholdsTest(test.TestCase): labels = labels.astype(np.float32) predictions = predictions.astype(np.float32) - with self.test_session() as sess: + with self.cached_session() as sess: # Reshape the data so its easy to queue up: predictions_batches = predictions.reshape((batch_size, num_batches)) labels_batches = labels.reshape((batch_size, num_batches)) @@ -2801,7 +2801,7 @@ class MeanAbsoluteErrorTest(test.TestCase): labels = random_ops.random_normal((10, 3), seed=2) error, update_op = metrics.mean_absolute_error(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -2822,7 +2822,7 @@ class MeanAbsoluteErrorTest(test.TestCase): error, update_op = metrics.mean_absolute_error(labels, predictions, weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(3, sess.run(update_op)) self.assertEqual(3, error.eval()) @@ -2866,7 +2866,7 @@ class MeanRelativeErrorTest(test.TestCase): error, update_op = metrics.mean_relative_error(labels, predictions, normalizer) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -2891,7 +2891,7 @@ class MeanRelativeErrorTest(test.TestCase): error, update_op = metrics.mean_relative_error( labels, predictions, normalizer=labels) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(expected_error, sess.run(update_op)) self.assertEqual(expected_error, error.eval()) @@ -2907,7 +2907,7 @@ class MeanRelativeErrorTest(test.TestCase): error, update_op = metrics.mean_relative_error( labels, predictions, normalizer=array_ops.zeros_like(labels)) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(0.0, sess.run(update_op)) self.assertEqual(0.0, error.eval()) @@ -2945,7 +2945,7 @@ class MeanSquaredErrorTest(test.TestCase): labels = random_ops.random_normal((10, 3), seed=2) error, update_op = metrics.mean_squared_error(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -2963,7 +2963,7 @@ class MeanSquaredErrorTest(test.TestCase): error, update_op = metrics.mean_squared_error(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(0, sess.run(update_op)) self.assertEqual(0, error.eval()) @@ -2976,7 +2976,7 @@ class MeanSquaredErrorTest(test.TestCase): error, update_op = metrics.mean_squared_error(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(6, sess.run(update_op)) self.assertEqual(6, error.eval()) @@ -2990,13 +2990,13 @@ class MeanSquaredErrorTest(test.TestCase): error, update_op = metrics.mean_squared_error(labels, predictions, weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(13, sess.run(update_op)) self.assertEqual(13, error.eval()) def testMultipleBatchesOfSizeOne(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 2, dtypes=dtypes_lib.float32, shapes=(1, 3)) @@ -3020,7 +3020,7 @@ class MeanSquaredErrorTest(test.TestCase): self.assertAlmostEqual(208.0 / 6, error.eval(), 5) def testMetricsComputedConcurrently(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates one set of predictions. preds_queue0 = data_flow_ops.FIFOQueue( 2, dtypes=dtypes_lib.float32, shapes=(1, 3)) @@ -3063,7 +3063,7 @@ class MeanSquaredErrorTest(test.TestCase): self.assertAlmostEqual(79.0 / 6, mse1, 5) def testMultipleMetricsOnMultipleBatchesOfSizeOne(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 2, dtypes=dtypes_lib.float32, shapes=(1, 3)) @@ -3122,7 +3122,7 @@ class RootMeanSquaredErrorTest(test.TestCase): labels = random_ops.random_normal((10, 3), seed=2) error, update_op = metrics.root_mean_squared_error(labels, predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -3135,7 +3135,7 @@ class RootMeanSquaredErrorTest(test.TestCase): self.assertEqual(initial_error, error.eval()) def testSingleUpdateZeroError(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( 0.0, shape=(1, 3), dtype=dtypes_lib.float32) labels = constant_op.constant(0.0, shape=(1, 3), dtype=dtypes_lib.float32) @@ -3148,7 +3148,7 @@ class RootMeanSquaredErrorTest(test.TestCase): self.assertEqual(0, rmse.eval()) def testSingleUpdateWithError(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [2, 4, 6], shape=(1, 3), dtype=dtypes_lib.float32) labels = constant_op.constant( @@ -3161,7 +3161,7 @@ class RootMeanSquaredErrorTest(test.TestCase): self.assertAlmostEqual(math.sqrt(6), rmse.eval(), 5) def testSingleUpdateWithErrorAndWeights(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [2, 4, 6, 8], shape=(1, 4), dtype=dtypes_lib.float32) labels = constant_op.constant( @@ -3220,7 +3220,7 @@ class MeanCosineDistanceTest(test.TestCase): labels = random_ops.random_normal((10, 3), seed=2) error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=1) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -3242,7 +3242,7 @@ class MeanCosineDistanceTest(test.TestCase): error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(0, sess.run(update_op)) self.assertEqual(0, error.eval()) @@ -3258,7 +3258,7 @@ class MeanCosineDistanceTest(test.TestCase): error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(1, sess.run(update_op), 5) self.assertAlmostEqual(1, error.eval(), 5) @@ -3279,7 +3279,7 @@ class MeanCosineDistanceTest(test.TestCase): np_labels, shape=(3, 1, 3), dtype=dtypes_lib.float32) error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAlmostEqual(1.0, sess.run(update_op), 5) self.assertAlmostEqual(1.0, error.eval(), 5) @@ -3298,7 +3298,7 @@ class MeanCosineDistanceTest(test.TestCase): error, update_op = metrics.mean_cosine_distance( labels, predictions, dim=2, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(0, sess.run(update_op)) self.assertEqual(0, error.eval()) @@ -3317,7 +3317,7 @@ class MeanCosineDistanceTest(test.TestCase): error, update_op = metrics.mean_cosine_distance( labels, predictions, dim=2, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertEqual(1.5, update_op.eval()) self.assertEqual(1.5, error.eval()) @@ -3352,7 +3352,7 @@ class PcntBelowThreshTest(test.TestCase): self.assertListEqual(ops.get_collection(my_collection_name), [update_op]) def testOneUpdate(self): - with self.test_session() as sess: + with self.cached_session() as sess: values = constant_op.constant( [2, 4, 6, 8], shape=(1, 4), dtype=dtypes_lib.float32) @@ -3369,7 +3369,7 @@ class PcntBelowThreshTest(test.TestCase): self.assertAlmostEqual(0.0, pcnt2, 5) def testSomePresentOneUpdate(self): - with self.test_session() as sess: + with self.cached_session() as sess: values = constant_op.constant( [2, 4, 6, 8], shape=(1, 4), dtype=dtypes_lib.float32) weights = constant_op.constant( @@ -3445,7 +3445,7 @@ class MeanIOUTest(test.TestCase): mean_iou, update_op = metrics.mean_iou( labels, predictions, num_classes=num_classes) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -3459,7 +3459,7 @@ class MeanIOUTest(test.TestCase): def testMultipleUpdates(self): num_classes = 3 - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 5, dtypes=dtypes_lib.int32, shapes=(1, 1)) @@ -3490,7 +3490,7 @@ class MeanIOUTest(test.TestCase): def testMultipleUpdatesWithWeights(self): num_classes = 2 - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 6, dtypes=dtypes_lib.int32, shapes=(1, 1)) @@ -3538,7 +3538,7 @@ class MeanIOUTest(test.TestCase): # one class, and thus there is one row and one column with # zero entries in the confusion matrix. num_classes = 3 - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. # There is no prediction for class 2. preds_queue = data_flow_ops.FIFOQueue( @@ -3585,7 +3585,7 @@ class MeanIOUTest(test.TestCase): ], 0) num_classes = 2 - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou(labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) confusion_matrix = update_op.eval() @@ -3597,7 +3597,7 @@ class MeanIOUTest(test.TestCase): predictions = array_ops.zeros([40]) labels = array_ops.zeros([40]) num_classes = 1 - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou(labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) self.assertEqual(40, update_op.eval()[0]) @@ -3607,7 +3607,7 @@ class MeanIOUTest(test.TestCase): predictions = array_ops.zeros([40]) labels = array_ops.ones([40]) num_classes = 2 - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou(labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) self.assertAllEqual([[0, 0], [40, 0]], update_op.eval()) @@ -3637,7 +3637,7 @@ class MeanIOUTest(test.TestCase): 0, shape=[1]) ], 0) - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou( labels, predictions, num_classes, weights=weights) sess.run(variables.local_variables_initializer()) @@ -3657,7 +3657,7 @@ class MeanIOUTest(test.TestCase): [[0, 0, 2, 1, 1, 1], [1, 1, 2, 0, 0, 0]]]) num_classes = 3 - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou(labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) self.assertAllEqual([[7, 4, 3], [3, 5, 2], [0, 0, 0]], update_op.eval()) @@ -3669,7 +3669,7 @@ class MeanIOUTest(test.TestCase): labels = constant_op.constant([0]) predictions = constant_op.constant([0]) num_classes = 2 - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou(labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) self.assertAllEqual([[1, 0], [0, 0]], update_op.eval()) @@ -3687,7 +3687,7 @@ class MeanIOUTest(test.TestCase): [[0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0]]]) num_classes = 3 - with self.test_session() as sess: + with self.cached_session() as sess: miou, update_op = metrics.mean_iou(labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) self.assertAllEqual([[9, 5, 0], [3, 7, 0], [0, 0, 0]], update_op.eval()) @@ -3751,7 +3751,7 @@ class MeanPerClassAccuracyTest(test.TestCase): mean_accuracy, update_op = metrics.mean_per_class_accuracy( labels, predictions, num_classes=num_classes) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -3764,7 +3764,7 @@ class MeanPerClassAccuracyTest(test.TestCase): self.assertEqual(initial_mean_accuracy, mean_accuracy.eval()) num_classes = 3 - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 5, dtypes=dtypes_lib.int32, shapes=(1, 1)) @@ -3796,7 +3796,7 @@ class MeanPerClassAccuracyTest(test.TestCase): def testMultipleUpdatesWithWeights(self): num_classes = 2 - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. preds_queue = data_flow_ops.FIFOQueue( 6, dtypes=dtypes_lib.int32, shapes=(1, 1)) @@ -3844,7 +3844,7 @@ class MeanPerClassAccuracyTest(test.TestCase): # one class, and thus there is one row and one column with # zero entries in the confusion matrix. num_classes = 3 - with self.test_session() as sess: + with self.cached_session() as sess: # Create the queue that populates the predictions. # There is no prediction for class 2. preds_queue = data_flow_ops.FIFOQueue( @@ -3880,7 +3880,7 @@ class MeanPerClassAccuracyTest(test.TestCase): predictions = array_ops.zeros([40]) labels = array_ops.zeros([40]) num_classes = 1 - with self.test_session() as sess: + with self.cached_session() as sess: mean_accuracy, update_op = metrics.mean_per_class_accuracy( labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) @@ -3891,7 +3891,7 @@ class MeanPerClassAccuracyTest(test.TestCase): predictions = array_ops.zeros([40]) labels = array_ops.ones([40]) num_classes = 2 - with self.test_session() as sess: + with self.cached_session() as sess: mean_accuracy, update_op = metrics.mean_per_class_accuracy( labels, predictions, num_classes) sess.run(variables.local_variables_initializer()) @@ -3910,7 +3910,7 @@ class MeanPerClassAccuracyTest(test.TestCase): constant_op.constant(0, shape=[1]), constant_op.constant(1, shape=[8]), constant_op.constant(0, shape=[1]) ], 0) - with self.test_session() as sess: + with self.cached_session() as sess: mean_accuracy, update_op = metrics.mean_per_class_accuracy( labels, predictions, num_classes, weights=weights) sess.run(variables.local_variables_initializer()) @@ -3944,7 +3944,7 @@ class FalseNegativesTest(test.TestCase): tn, tn_update_op = metrics.false_negatives( labels=labels, predictions=predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(3., tn_update_op.eval()) @@ -3963,7 +3963,7 @@ class FalseNegativesTest(test.TestCase): tn, tn_update_op = metrics.false_negatives( labels=labels, predictions=predictions, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(5., tn_update_op.eval()) @@ -3993,7 +3993,7 @@ class FalseNegativesAtThresholdsTest(test.TestCase): fn, fn_update_op = metrics.false_negatives_at_thresholds( predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0, 0, 0), fn.eval()) self.assertAllEqual((0, 2, 3), fn_update_op.eval()) @@ -4012,7 +4012,7 @@ class FalseNegativesAtThresholdsTest(test.TestCase): weights=((3.0,), (5.0,), (7.0,)), thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0.0, 0.0, 0.0), fn.eval()) self.assertAllEqual((0.0, 8.0, 11.0), fn_update_op.eval()) @@ -4043,7 +4043,7 @@ class FalsePositivesTest(test.TestCase): tn, tn_update_op = metrics.false_positives( labels=labels, predictions=predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(7., tn_update_op.eval()) @@ -4062,7 +4062,7 @@ class FalsePositivesTest(test.TestCase): tn, tn_update_op = metrics.false_positives( labels=labels, predictions=predictions, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(14., tn_update_op.eval()) @@ -4092,7 +4092,7 @@ class FalsePositivesAtThresholdsTest(test.TestCase): fp, fp_update_op = metrics.false_positives_at_thresholds( predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0, 0, 0), fp.eval()) self.assertAllEqual((7, 4, 2), fp_update_op.eval()) @@ -4113,7 +4113,7 @@ class FalsePositivesAtThresholdsTest(test.TestCase): (19.0, 23.0, 29.0, 31.0)), thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0.0, 0.0, 0.0), fp.eval()) self.assertAllEqual((125.0, 42.0, 12.0), fp_update_op.eval()) @@ -4144,7 +4144,7 @@ class TrueNegativesTest(test.TestCase): tn, tn_update_op = metrics.true_negatives( labels=labels, predictions=predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(3., tn_update_op.eval()) @@ -4163,7 +4163,7 @@ class TrueNegativesTest(test.TestCase): tn, tn_update_op = metrics.true_negatives( labels=labels, predictions=predictions, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(4., tn_update_op.eval()) @@ -4193,7 +4193,7 @@ class TrueNegativesAtThresholdsTest(test.TestCase): tn, tn_update_op = metrics.true_negatives_at_thresholds( predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0, 0, 0), tn.eval()) self.assertAllEqual((2, 5, 7), tn_update_op.eval()) @@ -4212,7 +4212,7 @@ class TrueNegativesAtThresholdsTest(test.TestCase): weights=((0.0, 2.0, 3.0, 5.0),), thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0.0, 0.0, 0.0), tn.eval()) self.assertAllEqual((5.0, 15.0, 23.0), tn_update_op.eval()) @@ -4243,7 +4243,7 @@ class TruePositivesTest(test.TestCase): tn, tn_update_op = metrics.true_positives( labels=labels, predictions=predictions) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(7., tn_update_op.eval()) @@ -4262,7 +4262,7 @@ class TruePositivesTest(test.TestCase): tn, tn_update_op = metrics.true_positives( labels=labels, predictions=predictions, weights=weights) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllClose(0., tn.eval()) self.assertAllClose(12., tn_update_op.eval()) @@ -4292,7 +4292,7 @@ class TruePositivesAtThresholdsTest(test.TestCase): tp, tp_update_op = metrics.true_positives_at_thresholds( predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0, 0, 0), tp.eval()) self.assertAllEqual((3, 1, 0), tp_update_op.eval()) @@ -4309,7 +4309,7 @@ class TruePositivesAtThresholdsTest(test.TestCase): predictions=predictions, labels=labels, weights=37.0, thresholds=[0.15, 0.5, 0.85]) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) self.assertAllEqual((0.0, 0.0, 0.0), tp.eval()) self.assertAllEqual((111.0, 37.0, 0.0), tp_update_op.eval()) diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py index 944de217a1..e415d7879e 100644 --- a/tensorflow/python/kernel_tests/pad_op_test.py +++ b/tensorflow/python/kernel_tests/pad_op_test.py @@ -188,7 +188,7 @@ class PadOpTest(test.TestCase): mode="SYMMETRIC").eval() def testInvalid(self): - with self.test_session(): + with self.cached_session(): x = [[1, 2, 3], [4, 5, 6]] with self.assertRaisesRegexp(ValueError, "Unknown padding mode"): array_ops.pad(x, [[1, 0], [2, 1]], mode="weird").eval() diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py index d8c3f9823c..95f3dcceea 100644 --- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py @@ -95,13 +95,13 @@ class PaddingFIFOQueueTest(test.TestCase): """, q.queue_ref.op.node_def) def testEnqueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) enqueue_op = q.enqueue((10.0,)) enqueue_op.run() def testEnqueueWithShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue( 10, dtypes_lib.float32, shapes=((3, 2),)) enqueue_correct_op = q.enqueue(([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],)) @@ -111,14 +111,14 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(1, q.size().eval()) def testEnqueueManyWithShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue( 10, [dtypes_lib.int32, dtypes_lib.int32], shapes=[(), (2,)]) q.enqueue_many([[1, 2, 3, 4], [[1, 1], [2, 2], [3, 3], [4, 4]]]).run() self.assertEqual(4, q.size().eval()) def testParallelEnqueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -144,7 +144,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, results) def testParallelDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -168,7 +168,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, results) def testDequeue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -182,7 +182,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([elems[i]], vals) def testEnqueueAndBlockingDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(3, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0] enqueue_ops = [q.enqueue((x,)) for x in elems] @@ -212,7 +212,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([elem], result) def testMultiEnqueueAndDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), ((), ())) @@ -230,12 +230,12 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([y], y_val) def testQueueSizeEmpty(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) self.assertEqual([0], q.size().eval()) def testQueueSizeAfterEnqueueAndDequeue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) enqueue_op = q.enqueue((10.0,)) dequeued_t = q.dequeue() @@ -248,7 +248,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(0, size.eval()) def testEnqueueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -261,7 +261,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([elems[i % 4]], vals) def testEmptyEnqueueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ( (None, None),)) empty_t = constant_op.constant( @@ -274,7 +274,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([0], size_t.eval()) def testEmptyDequeueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, shapes=((),)) enqueue_op = q.enqueue((10.0,)) dequeued_t = q.dequeue_many(0) @@ -284,7 +284,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([], dequeued_t.eval().tolist()) def testEmptyDequeueManyWithDynamicShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue( 10, dtypes_lib.float32, shapes=((None,),)) enqueue_op = q.enqueue(([10.0],)) @@ -295,7 +295,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([], dequeued_t.eval().tolist()) def testEmptyDequeueUpToWithDynamicShape(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue( 10, dtypes_lib.float32, shapes=((None,),)) enqueue_op = q.enqueue(([10.0],)) @@ -306,7 +306,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([], dequeued_t.eval().tolist()) def testConstructPaddingFIFOQueueWithNoShape(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp( ValueError, r"When providing partial shapes, a list of shapes must be provided."): @@ -314,7 +314,7 @@ class PaddingFIFOQueueTest(test.TestCase): None).queue_ref.eval() def testMultiEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, (dtypes_lib.float32, dtypes_lib.int32), ((), (2,))) @@ -332,7 +332,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(int_elems[i % 4], int_val) def testMultiEnqueueManyWithPartiallyKnownShapes(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue( 10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (None,))) float_elems = [10.0, 20.0, 30.0, 40.0] @@ -349,7 +349,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(int_elems[i % 4], int_val) def testDequeueMany(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_op = q.enqueue_many((elems,)) @@ -361,7 +361,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(elems[4:8], dequeued_t.eval()) def testDequeueUpToNoBlocking(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] enqueue_op = q.enqueue_many((elems,)) @@ -373,7 +373,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(elems[4:8], dequeued_t.eval()) def testMultiDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue( 10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (2,))) float_elems = [ @@ -404,7 +404,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape()) def testMultiDequeueManyWithPartiallyKnownShapes(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue( 10, (dtypes_lib.float32, dtypes_lib.int32), shapes=((), (None,))) float_elems = [ @@ -443,7 +443,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeued_single_t[1].get_shape())) def testMultiDequeueManyWithPartiallyKnownShapesAndVariableSizeInput(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue( 10, (dtypes_lib.string, dtypes_lib.int32), shapes=((None,), (1, None))) @@ -484,7 +484,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeued_single_t[1].get_shape())) def testMultiDequeueUpToPartiallyKnownShapesAndVariableInputNoBlocking(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue( 10, (dtypes_lib.string, dtypes_lib.int32), shapes=((None,), (1, None))) @@ -525,7 +525,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeued_single_t[1].get_shape())) def testHighDimension(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, ((4, 4, 4, 4),)) elems = np.array([[[[[x] * 4] * 4] * 4] * 4 for x in range(10)], np.int32) enqueue_op = q.enqueue_many((elems,)) @@ -535,7 +535,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(dequeued_t.eval(), elems) def testPartiallyKnownHighDimension(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, ( (4, None, 4, None),)) elems = np.array([[[[[x] * 4] * 4] * 4] * 4 for x in range(10)], np.int32) @@ -592,7 +592,7 @@ class PaddingFIFOQueueTest(test.TestCase): array_ops.placeholder(dtypes_lib.int32))) def testEnqueueWrongPartiallyKnownShapeAtRuntime(self): - with self.test_session() as sess: + with self.cached_session() as sess: # First dimension of second component is unknown, second # dimension must be 3. q = data_flow_ops.PaddingFIFOQueue(10, @@ -607,7 +607,7 @@ class PaddingFIFOQueueTest(test.TestCase): feed_dict={elems_bad: np.array([1] * 12).reshape((3, 4))}) def testEnqueueDequeueManyWrongPartiallyKnownShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: # First dimension of second component is unknown, second # dimension must be 3. q = data_flow_ops.PaddingFIFOQueue(10, @@ -625,7 +625,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeued_t.eval() def testParallelEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(1000, dtypes_lib.float32, shapes=((),)) elems = [10.0 * x for x in range(100)] enqueue_op = q.enqueue_many((elems,)) @@ -644,7 +644,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertItemsEqual(dequeued_t.eval(), elems * 10) def testParallelDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(1000, dtypes_lib.float32, shapes=((),)) elems = [10.0 * x for x in range(1000)] enqueue_op = q.enqueue_many((elems,)) @@ -666,7 +666,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, dequeued_elems) def testParallelDequeueUpTo(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(1000, dtypes_lib.float32, shapes=((),)) elems = [10.0 * x for x in range(1000)] enqueue_op = q.enqueue_many((elems,)) @@ -690,7 +690,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertItemsEqual(elems, dequeued_elems) def testParallelEnqueueAndDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(50, dtypes_lib.float32, shapes=((),)) initial_elements = [10.0] * 49 q.enqueue_many((initial_elements,)).run() @@ -723,7 +723,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertTrue(elem in (10.0, 20.0)) def testMixtureOfEnqueueAndEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, shapes=((),)) enqueue_placeholder = array_ops.placeholder(dtypes_lib.int32, shape=()) enqueue_op = q.enqueue((enqueue_placeholder,)) @@ -759,7 +759,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testMixtureOfDequeueAndDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.int32, shapes=((),)) enqueue_op = q.enqueue_many((np.arange(250, dtype=np.int32),)) dequeued_t = q.dequeue() @@ -793,7 +793,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testBlockingDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -820,7 +820,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(elems, dequeued_elems) def testBlockingDequeueUpTo(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -847,7 +847,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(elems, dequeued_elems) def testDequeueManyWithTensorParameter(self): - with self.test_session(): + with self.cached_session(): # Define a first queue that contains integer counts. dequeue_counts = [random.randint(1, 10) for _ in range(100)] count_q = data_flow_ops.PaddingFIFOQueue(100, dtypes_lib.int32, ((),)) @@ -872,7 +872,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(elems, dequeued_elems) def testDequeueFromClosedQueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -890,7 +890,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeued_t.eval() def testBlockingDequeueFromClosedQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -916,7 +916,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testDequeueUpToFromClosedQueueReturnsRemainder(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -938,7 +938,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueFromClosedEmptyQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) close_op = q.close() dequeued_t = q.dequeue() @@ -958,7 +958,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueManyFromClosedQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -983,7 +983,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueManyButNotAllFromClosedQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1008,7 +1008,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testEnqueueManyLargerThanCapacityWithConcurrentDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1045,7 +1045,7 @@ class PaddingFIFOQueueTest(test.TestCase): close_thread.join() def testClosedBlockingDequeueManyRestoresPartialBatch(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, (dtypes_lib.float32, dtypes_lib.float32), ((), ())) elems_a = [1.0, 2.0, 3.0] @@ -1078,7 +1078,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testBlockingDequeueManyFromClosedEmptyQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) close_op = q.close() dequeued_t = q.dequeue_many(4) @@ -1098,7 +1098,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testBlockingDequeueUpToFromClosedEmptyQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) close_op = q.close() dequeued_t = q.dequeue_up_to(4) @@ -1118,7 +1118,7 @@ class PaddingFIFOQueueTest(test.TestCase): dequeue_thread.join() def testEnqueueToClosedQueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) enqueue_op = q.enqueue((10.0,)) close_op = q.close() @@ -1131,7 +1131,7 @@ class PaddingFIFOQueueTest(test.TestCase): enqueue_op.run() def testEnqueueManyToClosedQueue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1145,7 +1145,7 @@ class PaddingFIFOQueueTest(test.TestCase): enqueue_op.run() def testBlockingEnqueueToFullQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1168,7 +1168,7 @@ class PaddingFIFOQueueTest(test.TestCase): thread.join() def testBlockingEnqueueManyToFullQueue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1195,7 +1195,7 @@ class PaddingFIFOQueueTest(test.TestCase): thread.join() def testBlockingEnqueueBeforeClose(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) @@ -1232,7 +1232,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(0, q.size().eval()) def testBlockingEnqueueManyBeforeClose(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) elems = [10.0, 20.0, 30.0] enqueue_op = q.enqueue_many((elems,)) @@ -1265,7 +1265,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(elem, dequeued_t.eval()) def testDoesNotLoseValue(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PaddingFIFOQueue(1, dtypes_lib.float32, ((),)) enqueue_op = q.enqueue((10.0,)) size_t = q.size() @@ -1275,7 +1275,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(size_t.eval(), [1]) def testSharedQueueSameSession(self): - with self.test_session(): + with self.cached_session(): q1 = data_flow_ops.PaddingFIFOQueue( 1, dtypes_lib.float32, ((),), shared_name="shared_queue") q1.enqueue((10.0,)).run() @@ -1305,7 +1305,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(q2_size_t.eval(), [0]) def testIncompatibleSharedQueueErrors(self): - with self.test_session(): + with self.cached_session(): q_a_1 = data_flow_ops.PaddingFIFOQueue( 10, dtypes_lib.float32, ((),), shared_name="q_a") q_a_2 = data_flow_ops.PaddingFIFOQueue( @@ -1356,7 +1356,7 @@ class PaddingFIFOQueueTest(test.TestCase): q_f_2.queue_ref.op.run() def testSelectQueue(self): - with self.test_session(): + with self.cached_session(): num_queues = 10 qlist = list() for _ in xrange(num_queues): @@ -1370,7 +1370,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual(q.dequeue().eval(), 10.0) def testSelectQueueOutOfRange(self): - with self.test_session(): + with self.cached_session(): q1 = data_flow_ops.PaddingFIFOQueue(10, dtypes_lib.float32, ((),)) q2 = data_flow_ops.PaddingFIFOQueue(15, dtypes_lib.float32, ((),)) enq_q = data_flow_ops.PaddingFIFOQueue.from_list(3, [q1, q2]) @@ -1394,7 +1394,7 @@ class PaddingFIFOQueueTest(test.TestCase): sess.run(enqueue_many_op) def testResetOfBlockingOperation(self): - with self.test_session() as sess: + with self.cached_session() as sess: q_empty = data_flow_ops.PaddingFIFOQueue(5, dtypes_lib.float32, ((),)) dequeue_op = q_empty.dequeue() dequeue_many_op = q_empty.dequeue_many(1) @@ -1422,7 +1422,7 @@ class PaddingFIFOQueueTest(test.TestCase): t.join() def testBigEnqueueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(5, dtypes_lib.int32, ((),)) elem = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] enq = q.enqueue_many((elem,)) @@ -1467,7 +1467,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(elem, results) def testBigDequeueMany(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PaddingFIFOQueue(2, dtypes_lib.int32, ((),)) elem = np.arange(4, dtype=np.int32) enq_list = [q.enqueue((e,)) for e in elem] @@ -1493,7 +1493,7 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertAllEqual(elem, results) def testDtypes(self): - with self.test_session() as sess: + with self.cached_session() as sess: dtypes = [ dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int32, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.int8, dtypes_lib.int64, diff --git a/tensorflow/python/kernel_tests/parse_single_example_op_test.py b/tensorflow/python/kernel_tests/parse_single_example_op_test.py index bf4c89b368..a84895a287 100644 --- a/tensorflow/python/kernel_tests/parse_single_example_op_test.py +++ b/tensorflow/python/kernel_tests/parse_single_example_op_test.py @@ -89,7 +89,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors, class ParseExampleTest(test.TestCase): def _test(self, kwargs, expected_values=None, expected_err=None): - with self.test_session() as sess: + with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): @@ -844,7 +844,7 @@ class ParseExampleTest(test.TestCase): class ParseSingleExampleTest(test.TestCase): def _test(self, kwargs, expected_values=None, expected_err=None): - with self.test_session() as sess: + with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py index 7dff4501cc..71d8b60d3c 100644 --- a/tensorflow/python/kernel_tests/parsing_ops_test.py +++ b/tensorflow/python/kernel_tests/parsing_ops_test.py @@ -89,7 +89,7 @@ def _compare_output_to_expected(tester, dict_tensors, expected_tensors, class ParseExampleTest(test.TestCase): def _test(self, kwargs, expected_values=None, expected_err=None): - with self.test_session() as sess: + with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): @@ -937,7 +937,7 @@ class ParseExampleTest(test.TestCase): class ParseSingleExampleTest(test.TestCase): def _test(self, kwargs, expected_values=None, expected_err=None): - with self.test_session() as sess: + with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): @@ -1054,7 +1054,7 @@ class ParseSequenceExampleTest(test.TestCase): expected_feat_list_values = expected_feat_list_values or {} expected_length_values = expected_length_values or {} - with self.test_session() as sess: + with self.cached_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): @@ -1606,7 +1606,7 @@ class ParseSequenceExampleTest(test.TestCase): class DecodeJSONExampleTest(test.TestCase): def _testRoundTrip(self, examples): - with self.test_session() as sess: + with self.cached_session() as sess: examples = np.array(examples, dtype=np.object) json_tensor = constant_op.constant( @@ -1696,7 +1696,7 @@ class DecodeJSONExampleTest(test.TestCase): ]) def testInvalidSyntax(self): - with self.test_session() as sess: + with self.cached_session() as sess: json_tensor = constant_op.constant(["{]"]) binary_tensor = parsing_ops.decode_json_example(json_tensor) with self.assertRaisesOpError("Error while parsing JSON"): @@ -1706,7 +1706,7 @@ class DecodeJSONExampleTest(test.TestCase): class ParseTensorOpTest(test.TestCase): def testToFloat32(self): - with self.test_session(): + with self.cached_session(): expected = np.random.rand(3, 4, 5).astype(np.float32) tensor_proto = tensor_util.make_tensor_proto(expected) @@ -1719,7 +1719,7 @@ class ParseTensorOpTest(test.TestCase): self.assertAllEqual(expected, result) def testToUint8(self): - with self.test_session(): + with self.cached_session(): expected = np.random.rand(3, 4, 5).astype(np.uint8) tensor_proto = tensor_util.make_tensor_proto(expected) @@ -1732,7 +1732,7 @@ class ParseTensorOpTest(test.TestCase): self.assertAllEqual(expected, result) def testTypeMismatch(self): - with self.test_session(): + with self.cached_session(): expected = np.random.rand(3, 4, 5).astype(np.uint8) tensor_proto = tensor_util.make_tensor_proto(expected) @@ -1745,7 +1745,7 @@ class ParseTensorOpTest(test.TestCase): tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()}) def testInvalidInput(self): - with self.test_session(): + with self.cached_session(): serialized = array_ops.placeholder(dtypes.string) tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16) diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py index 15d5702252..b34d30f5c0 100644 --- a/tensorflow/python/kernel_tests/partitioned_variables_test.py +++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py @@ -39,7 +39,7 @@ from tensorflow.python.training import saver as saver_lib class PartitionerCreatorsTest(test.TestCase): def testFixedSizePartitioner(self): - with self.test_session(): + with self.cached_session(): partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0) with variable_scope.variable_scope("root", partitioner=partitioner): v0 = variable_scope.get_variable( @@ -50,7 +50,7 @@ class PartitionerCreatorsTest(test.TestCase): self.assertAllEqual(v0_part, (5, 1)) def testFixedSizePartitionerInt64(self): - with self.test_session(): + with self.cached_session(): partitioner = partitioned_variables.fixed_size_partitioner(4, axis=0) with variable_scope.variable_scope("root", partitioner=partitioner): v0 = variable_scope.get_variable("v0", dtype=dtypes.int64, shape=[20]) @@ -58,7 +58,7 @@ class PartitionerCreatorsTest(test.TestCase): self.assertEqual(len(v0_list), 4) def testResourceFixedSizePartitioner(self): - with self.test_session(): + with self.cached_session(): partitioner = partitioned_variables.fixed_size_partitioner(5, axis=0) with variable_scope.variable_scope( "root", partitioner=partitioner, use_resource=True): @@ -88,7 +88,7 @@ class PartitionerCreatorsTest(test.TestCase): self.assertAllEqual(v0_part, expected_partitions) def testVariableAxisSizePartitioner(self): - with self.test_session(): + with self.cached_session(): # Create a partitioned variable of shape (4, 8, 16, 32) type float32 # Bytes per slice along the given axes: @@ -210,7 +210,7 @@ class PartitionerCreatorsTest(test.TestCase): self.assertAllEqual(v0_part, expected_partitions) def testMinMaxVariablePartitioner(self): - with self.test_session(): + with self.cached_session(): # Partitioning a variable of shape=[2048] with a minimum of 2K per slice. self._testMinMaxVariablePartitioner( max_partitions=100, @@ -323,7 +323,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertEquals(expected_specs[i], slices[i]._save_slice_info.spec) def testVecConstantInit(self): - with self.test_session(): + with self.cached_session(): rnd_par = constant_op.constant([1, 2, 3, 4]) vs = partitioned_variables.create_partitioned_variables([4], [4], rnd_par) variables.global_variables_initializer().run() @@ -334,7 +334,7 @@ class PartitionedVariablesTestCase(test.TestCase): self._TestSaveSpec(vs, ["4 0,1", "4 1,1", "4 2,1", "4 3,1"]) def testConstantInit(self): - with self.test_session(): + with self.cached_session(): rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]]) vs = partitioned_variables.create_partitioned_variables([2, 4], [1, 2], rnd_par) @@ -346,7 +346,7 @@ class PartitionedVariablesTestCase(test.TestCase): self._TestSaveSpec(vs, ["2 4 0,2:0,2", "2 4 0,2:2,2"]) def _testNameHelper(self, use_resource=False): - with self.test_session(): + with self.cached_session(): rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]]) with variable_scope.variable_scope("hi", use_resource=use_resource): vs1 = partitioned_variables.create_partitioned_variables([2, 4], [1, 2], @@ -363,7 +363,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertEqual(var2_name + "/part_0:0", vs2[0].name) self.assertEqual(var2_name + "/part_1:0", vs2[1].name) # Test same variable. - with self.test_session(): + with self.cached_session(): rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]]) with variable_scope.variable_scope( "hola", use_resource=use_resource) as vs: @@ -383,7 +383,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertEqual(var2_name + "/part_0:0", vs2[0].name) self.assertEqual(var2_name + "/part_1:0", vs2[1].name) # Test name_scope - with self.test_session(): + with self.cached_session(): rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]]) with ops.name_scope("ola"): vs1 = partitioned_variables.create_partitioned_variables([2, 4], [1, 2], @@ -408,7 +408,7 @@ class PartitionedVariablesTestCase(test.TestCase): self._testNameHelper(use_resource=True) def testRandomInitValue(self): - with self.test_session(): + with self.cached_session(): rnd = variables.Variable(random_ops.random_uniform([200, 40])) vs = partitioned_variables.create_partitioned_variables( rnd.get_shape(), [1, 10], rnd.initialized_value()) @@ -425,7 +425,7 @@ class PartitionedVariablesTestCase(test.TestCase): ]) def testRandomInitUnevenPartitions(self): - with self.test_session(): + with self.cached_session(): rnd = variables.Variable( random_ops.random_uniform([20, 43], dtype=dtypes.float64)) var_lists = [ @@ -463,7 +463,7 @@ class PartitionedVariablesTestCase(test.TestCase): self._TestSaveSpec(vs, save_specs[i]) def testDegenerate(self): - with self.test_session(): + with self.cached_session(): rnd = variables.Variable(random_ops.random_uniform([10, 43])) vs = partitioned_variables.create_partitioned_variables( rnd.get_shape(), [1, 1], rnd.initialized_value()) @@ -474,7 +474,7 @@ class PartitionedVariablesTestCase(test.TestCase): self._TestSaveSpec(vs, ["10 43 0,10:0,43"]) def testSliceSizeOne(self): - with self.test_session(): + with self.cached_session(): rnd = variables.Variable(random_ops.random_uniform([10, 43])) vs = partitioned_variables.create_partitioned_variables( rnd.get_shape(), [10, 1], rnd.initialized_value()) @@ -492,7 +492,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertAllClose([0., 1., 2., 3.], _IotaInitializer([4])) self.assertAllClose([[0., 1.], [0., 10.], [0., 100.], [0., 1000.]], _IotaInitializer([4, 2])) - with self.test_session(): + with self.cached_session(): vs = partitioned_variables.create_partitioned_variables([13, 5], [3, 1], _IotaInitializer) variables.global_variables_initializer().run() @@ -506,7 +506,7 @@ class PartitionedVariablesTestCase(test.TestCase): def testRandomInitializer(self): # Sanity check that the slices uses a different seed when using a random # initializer function. - with self.test_session(): + with self.cached_session(): var0, var1 = partitioned_variables.create_partitioned_variables( [20, 12], [1, 2], init_ops.random_uniform_initializer()) variables.global_variables_initializer().run() @@ -514,7 +514,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertTrue(np.linalg.norm(val0 - val1) > 1e-6) # Negative test that proves that slices have the same values if # the random initializer uses a seed. - with self.test_session(): + with self.cached_session(): var0, var1 = partitioned_variables.create_partitioned_variables( [20, 12], [1, 2], init_ops.random_uniform_initializer(seed=201)) variables.global_variables_initializer().run() @@ -522,7 +522,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertAllClose(val0, val1) def testSomeErrors(self): - with self.test_session(): + with self.cached_session(): rnd = variables.Variable(random_ops.random_uniform([10, 43])) with self.assertRaises(ValueError): partitioned_variables.create_partitioned_variables( @@ -547,7 +547,7 @@ class PartitionedVariablesTestCase(test.TestCase): [10, 43], [1, 50], rnd.initialized_value()) def testControlDepsNone(self): - with self.test_session() as session: + with self.cached_session() as session: c = constant_op.constant(1.0) with ops.control_dependencies([c]): # d get the control dependency. @@ -573,7 +573,7 @@ class PartitionedVariablesTestCase(test.TestCase): self.assertEqual([], op.control_inputs) def testConcat(self): - with self.test_session() as session: + with self.cached_session() as session: var_x = variable_scope.get_variable( "x", initializer=constant_op.constant([1., 2.]), diff --git a/tensorflow/python/kernel_tests/priority_queue_test.py b/tensorflow/python/kernel_tests/priority_queue_test.py index 3fb9c9c468..73a9c81638 100644 --- a/tensorflow/python/kernel_tests/priority_queue_test.py +++ b/tensorflow/python/kernel_tests/priority_queue_test.py @@ -36,7 +36,7 @@ from tensorflow.python.platform import test class PriorityQueueTest(test.TestCase): def testRoundTripInsertReadOnceSorts(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), ( (), ())) elem = np.random.randint(-5, 5, size=100).astype(np.int64) @@ -67,7 +67,7 @@ class PriorityQueueTest(test.TestCase): self.assertEqual(missed, set()) def testRoundTripInsertMultiThreadedReadOnceSorts(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), ( (), ())) elem = np.random.randint(-5, 5, size=100).astype(np.int64) @@ -113,7 +113,7 @@ class PriorityQueueTest(test.TestCase): self.assertEqual(missed, set()) def testRoundTripFillsCapacityMultiThreadedEnqueueAndDequeue(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PriorityQueue(10, (dtypes.int64), (())) num_threads = 40 @@ -163,7 +163,7 @@ class PriorityQueueTest(test.TestCase): self.assertAllEqual(sorted(dequeued), sorted(all_enqueued_values)) def testRoundTripInsertManyMultiThreadedReadManyMultithreadedSorts(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (())) num_threads = 40 @@ -219,7 +219,7 @@ class PriorityQueueTest(test.TestCase): self.assertAllEqual(set(dequeued), set(all_enqueued_values)) def testRoundTripInsertManyMultiThreadedReadOnceSorts(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), ( (), ())) elem = np.random.randint(-5, 5, size=100).astype(np.int64) @@ -268,7 +268,7 @@ class PriorityQueueTest(test.TestCase): self.assertEqual(missed, set()) def testRoundTripInsertOnceReadOnceSorts(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), ( (), ())) elem = np.random.randint(-100, 100, size=1000).astype(np.int64) @@ -289,7 +289,7 @@ class PriorityQueueTest(test.TestCase): self.assertTrue((dv0, dv1) in allowed[e]) def testRoundTripInsertOnceReadManySorts(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (())) elem = np.random.randint(-100, 100, size=1000).astype(np.int64) q.enqueue_many((elem, elem)).run() @@ -297,7 +297,7 @@ class PriorityQueueTest(test.TestCase): self.assertAllEqual(deq_values, sorted(elem)) def testRoundTripInsertOnceReadOnceLotsSorts(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (())) elem = np.random.randint(-100, 100, size=1000).astype(np.int64) q.enqueue_many((elem, elem)).run() @@ -306,13 +306,13 @@ class PriorityQueueTest(test.TestCase): self.assertAllEqual(deq_values, sorted(elem)) def testInsertingNonInt64Fails(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.PriorityQueue(2000, (dtypes.string), (())) with self.assertRaises(TypeError): q.enqueue_many((["a", "b", "c"], ["a", "b", "c"])).run() def testInsertingNonScalarFails(self): - with self.test_session() as sess: + with self.cached_session() as sess: input_priority = array_ops.placeholder(dtypes.int64) input_other = array_ops.placeholder(dtypes.string) q = data_flow_ops.PriorityQueue(2000, (dtypes.string,), (())) diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 8e06e1abfb..8c84b2a49f 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -146,7 +146,7 @@ class IdentityReaderTest(test.TestCase): self.assertAllEqual(expected, v) def testOneEpoch(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.IdentityReader("test_reader") work_completed = reader.num_work_units_completed() produced = reader.num_records_produced() @@ -180,7 +180,7 @@ class IdentityReaderTest(test.TestCase): self.assertAllEqual(0, queued_length.eval()) def testMultipleEpochs(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.IdentityReader("test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) enqueue = queue.enqueue_many([["DD", "EE"]]) @@ -201,7 +201,7 @@ class IdentityReaderTest(test.TestCase): sess.run([key, value]) def testSerializeRestore(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.IdentityReader("test_reader") produced = reader.num_records_produced() queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) @@ -256,7 +256,7 @@ class IdentityReaderTest(test.TestCase): reader.restore_state(b"BOGUS" + state[5:]).run() def testReset(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.IdentityReader("test_reader") work_completed = reader.num_work_units_completed() produced = reader.num_records_produced() @@ -307,7 +307,7 @@ class WholeFileReaderTest(test.TestCase): self.assertAllEqual(self._content[index], v) def testOneEpoch(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.WholeFileReader("test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) queue.enqueue_many([self._filenames]).run() @@ -323,7 +323,7 @@ class WholeFileReaderTest(test.TestCase): sess.run([key, value]) def testInfiniteEpochs(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.WholeFileReader("test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) enqueue = queue.enqueue_many([self._filenames]) @@ -366,7 +366,7 @@ class TextLineReaderTest(test.TestCase): return filenames def _testOneEpoch(self, files): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.TextLineReader(name="test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -391,7 +391,7 @@ class TextLineReaderTest(test.TestCase): def testSkipHeaderLines(self): files = self._CreateFiles() - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.TextLineReader(skip_header_lines=1, name="test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -522,7 +522,7 @@ class FixedLengthRecordReaderTest(TFCompressionTestCase): # gap_bytes=hop_bytes-record_bytes def _TestOneEpoch(self, files, num_records, gap_bytes, encoding=None): hop_bytes = 0 if gap_bytes == 0 else self._record_bytes + gap_bytes - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.FixedLengthRecordReader( header_bytes=self._header_bytes, record_bytes=self._record_bytes, @@ -549,7 +549,7 @@ class FixedLengthRecordReaderTest(TFCompressionTestCase): files, num_overlapped_records, encoding=None): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.FixedLengthRecordReader( header_bytes=self._header_bytes, record_bytes=self._record_bytes, @@ -621,7 +621,7 @@ class TFRecordReaderTest(TFCompressionTestCase): def testOneEpoch(self): files = self._CreateFiles() - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.TFRecordReader(name="test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -640,7 +640,7 @@ class TFRecordReaderTest(TFCompressionTestCase): def testReadUpTo(self): files = self._CreateFiles() - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.TFRecordReader(name="test_reader") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) batch_size = 3 @@ -670,7 +670,7 @@ class TFRecordReaderTest(TFCompressionTestCase): options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB) files = self._CreateFiles(options) - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.TFRecordReader(name="test_reader", options=options) queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -687,7 +687,7 @@ class TFRecordReaderTest(TFCompressionTestCase): options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP) files = self._CreateFiles(options) - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.TFRecordReader(name="test_reader", options=options) queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -752,7 +752,7 @@ class LMDBReaderTest(test.TestCase): shutil.copy(path, self.db_path) def testReadFromFile(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_file") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -770,7 +770,7 @@ class LMDBReaderTest(test.TestCase): k, v = sess.run([key, value]) def testReadFromSameFile(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") filename_queue = input_lib.string_input_producer( @@ -789,7 +789,7 @@ class LMDBReaderTest(test.TestCase): coord.join(threads) def testReadFromFolder(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_folder") queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=()) key, value = reader.read(queue) @@ -807,7 +807,7 @@ class LMDBReaderTest(test.TestCase): k, v = sess.run([key, value]) def testReadFromFileRepeatedly(self): - with self.test_session() as sess: + with self.cached_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_file_repeated") filename_queue = input_lib.string_input_producer( [self.db_path], num_epochs=None) diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py index 068860d5d4..ebb9872f22 100644 --- a/tensorflow/python/kernel_tests/record_input_test.py +++ b/tensorflow/python/kernel_tests/record_input_test.py @@ -44,7 +44,7 @@ class RecordInputOpTest(test.TestCase): w.close() def testRecordInputSimple(self): - with self.test_session() as sess: + with self.cached_session() as sess: self.generateTestData("basic", 1, 1) yield_op = data_flow_ops.RecordInput( @@ -57,7 +57,7 @@ class RecordInputOpTest(test.TestCase): self.assertEqual(sess.run(yield_op), b"0000000000") def testRecordInputSimpleGzip(self): - with self.test_session() as sess: + with self.cached_session() as sess: self.generateTestData( "basic", 1, @@ -76,7 +76,7 @@ class RecordInputOpTest(test.TestCase): self.assertEqual(sess.run(yield_op), b"0000000000") def testRecordInputSimpleZlib(self): - with self.test_session() as sess: + with self.cached_session() as sess: self.generateTestData( "basic", 1, @@ -98,7 +98,7 @@ class RecordInputOpTest(test.TestCase): files = 100 records_per_file = 100 batches = 2 - with self.test_session() as sess: + with self.cached_session() as sess: self.generateTestData("basic", files, records_per_file) records = data_flow_ops.RecordInput( @@ -126,7 +126,7 @@ class RecordInputOpTest(test.TestCase): def testDoesNotDeadlock(self): # Iterate multiple times to cause deadlock if there is a chance it can occur for _ in range(30): - with self.test_session() as sess: + with self.cached_session() as sess: self.generateTestData("basic", 1, 1) records = data_flow_ops.RecordInput( @@ -141,7 +141,7 @@ class RecordInputOpTest(test.TestCase): sess.run(yield_op) def testEmptyGlob(self): - with self.test_session() as sess: + with self.cached_session() as sess: record_input = data_flow_ops.RecordInput(file_pattern="foo") yield_op = record_input.get_yield_op() sess.run(variables.global_variables_initializer()) @@ -152,7 +152,7 @@ class RecordInputOpTest(test.TestCase): files = 10 records_per_file = 10 batches = 2 - with self.test_session() as sess: + with self.cached_session() as sess: self.generateTestData("basic", files, records_per_file) records = data_flow_ops.RecordInput( diff --git a/tensorflow/python/kernel_tests/reduce_join_op_test.py b/tensorflow/python/kernel_tests/reduce_join_op_test.py index 663561ced7..3bb4986313 100644 --- a/tensorflow/python/kernel_tests/reduce_join_op_test.py +++ b/tensorflow/python/kernel_tests/reduce_join_op_test.py @@ -113,7 +113,7 @@ class ReduceJoinTest(UnicodeTestCase): keep_dims: Whether or not to retain reduced dimensions. separator: The separator to use for joining. """ - with self.test_session(): + with self.cached_session(): output = string_ops.reduce_join( inputs=input_array, axis=axis, @@ -136,7 +136,7 @@ class ReduceJoinTest(UnicodeTestCase): axis: The indices to reduce. separator: The separator to use when joining. """ - with self.test_session(): + with self.cached_session(): output = string_ops.reduce_join( inputs=input_array, axis=axis, keep_dims=False, separator=separator) output_keep_dims = string_ops.reduce_join( @@ -234,7 +234,7 @@ class ReduceJoinTest(UnicodeTestCase): input_array = [["a"], ["b"]] truth = ["ab"] truth_shape = None - with self.test_session(): + with self.cached_session(): placeholder = array_ops.placeholder(dtypes.string, name="placeholder") reduced = string_ops.reduce_join(placeholder, axis=0) output_array = reduced.eval(feed_dict={placeholder.name: input_array}) @@ -247,7 +247,7 @@ class ReduceJoinTest(UnicodeTestCase): truth_dim_zero = ["thisplease", "isdo", "anot", "testpanic"] truth_dim_one = ["thisisatest", "pleasedonotpanic"] truth_shape = None - with self.test_session(): + with self.cached_session(): placeholder = array_ops.placeholder(dtypes.int32, name="placeholder") reduced = string_ops.reduce_join(input_array, axis=placeholder) output_array_dim_zero = reduced.eval(feed_dict={placeholder.name: [0]}) @@ -298,7 +298,7 @@ class ReduceJoinTest(UnicodeTestCase): self._testMultipleReduceJoin(input_array, axis=permutation) def testInvalidReductionIndices(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(ValueError, "Invalid reduction dim"): string_ops.reduce_join(inputs="", axis=0) with self.assertRaisesRegexp(ValueError, @@ -313,7 +313,7 @@ class ReduceJoinTest(UnicodeTestCase): string_ops.reduce_join(inputs=[[""]], axis=[0, 2]) def testZeroDims(self): - with self.test_session(): + with self.cached_session(): inputs = np.zeros([0, 1], dtype=str) # Reduction that drops the dim of size 0. @@ -326,7 +326,7 @@ class ReduceJoinTest(UnicodeTestCase): self.assertAllEqual([0], output_shape) def testInvalidArgsUnknownShape(self): - with self.test_session(): + with self.cached_session(): placeholder = array_ops.placeholder(dtypes.string, name="placeholder") index_too_high = string_ops.reduce_join(placeholder, axis=1) duplicate_index = string_ops.reduce_join(placeholder, axis=[-1, 1]) @@ -336,7 +336,7 @@ class ReduceJoinTest(UnicodeTestCase): duplicate_index.eval(feed_dict={placeholder.name: [[""]]}) def testInvalidArgsUnknownIndices(self): - with self.test_session(): + with self.cached_session(): placeholder = array_ops.placeholder(dtypes.int32, name="placeholder") reduced = string_ops.reduce_join(["test", "test2"], axis=placeholder) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index ea78b58d88..496a452a03 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -61,7 +61,7 @@ class ReducedShapeTest(test.TestCase): self.assertAllEqual(output.eval(), result) def testSimple(self): - with self.test_session(): + with self.cached_session(): self._check([3], [], [3]) self._check([3], [0], [1]) self._check([5, 3], [], [5, 3]) @@ -71,7 +71,7 @@ class ReducedShapeTest(test.TestCase): def testZeros(self): """Check that reduced_shape does the right thing with zero dimensions.""" - with self.test_session(): + with self.cached_session(): self._check([0], [], [0]) self._check([0], [0], [1]) self._check([0, 3], [], [0, 3]) @@ -84,7 +84,7 @@ class ReducedShapeTest(test.TestCase): self._check([3, 0], [0, 1], [1, 1]) def testNegAxes(self): - with self.test_session(): + with self.cached_session(): self._check([10, 10, 10], [-1], [10, 10, 1]) self._check([10, 10, 10], [-1, 2], [10, 10, 1]) self._check([10, 10, 10], [-1, -1], [10, 10, 1]) @@ -95,7 +95,7 @@ class ReducedShapeTest(test.TestCase): class ReductionUnknownShape(test.TestCase): def testBasic(self): - with self.test_session(): + with self.cached_session(): for dtype, reductions in [(dtypes.float32, (math_ops.reduce_sum, math_ops.reduce_mean, math_ops.reduce_prod, math_ops.reduce_max, @@ -617,7 +617,7 @@ class MinReductionTest(test.TestCase): def testGradient(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_min(t, [1, 2]) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -627,7 +627,7 @@ class MinReductionTest(test.TestCase): def testGradient2(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_min(t, [1]) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -637,7 +637,7 @@ class MinReductionTest(test.TestCase): def testGradient3(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_min(t, [2]) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -647,7 +647,7 @@ class MinReductionTest(test.TestCase): def testGradient4(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_min(t) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -655,7 +655,7 @@ class MinReductionTest(test.TestCase): self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8) def testEmptyGradients(self): - with self.test_session(): + with self.cached_session(): x = array_ops.zeros([0, 3]) y = math_ops.reduce_min(x, [1]) error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0]) @@ -744,7 +744,7 @@ class MaxReductionTest(test.TestCase): def testGradient(self): s = [2, 3, 4, 2] x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [1, 2]) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -754,7 +754,7 @@ class MaxReductionTest(test.TestCase): def testGradient2(self): s = [2, 3, 4, 2] x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [1]) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -764,7 +764,7 @@ class MaxReductionTest(test.TestCase): def testGradient3(self): s = [2, 3, 4, 2] x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [2]) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -774,7 +774,7 @@ class MaxReductionTest(test.TestCase): def testGradient4(self): s = [2, 3, 4, 2] x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) - with self.test_session(): + with self.cached_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -782,7 +782,7 @@ class MaxReductionTest(test.TestCase): self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8) def testEmptyGradients(self): - with self.test_session(): + with self.cached_session(): x = array_ops.zeros([0, 3]) y = math_ops.reduce_max(x, [1]) error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0]) @@ -960,7 +960,7 @@ class CountNonzeroReductionTest(test.TestCase): def testStringReduce(self): # Test case for GitHub issue 18712 - with self.test_session() as sess: + with self.cached_session() as sess: v = math_ops.count_nonzero(constant_op.constant(["test"])) self.assertAllClose(sess.run(v), 1) diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py index 7bd8c3ca27..e81f562a2a 100644 --- a/tensorflow/python/kernel_tests/regex_full_match_op_test.py +++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py @@ -35,7 +35,7 @@ class RegexFullMatchOpVariantsTest(test.TestCase, parameterized.TestCase): def testRegexFullMatch(self, op): values = ["abaaba", "abcdabcde"] - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant(values, dtypes.string) matched = op(input_tensor, "a.*a").eval() self.assertAllEqual([True, False], matched) @@ -49,14 +49,14 @@ class RegexFullMatchOpVariantsTest(test.TestCase, parameterized.TestCase): def testEmptyMatch(self, op): values = ["abc", "1"] - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant(values, dtypes.string) matched = op(input_tensor, "").eval() self.assertAllEqual([False, False], matched) def testInvalidPattern(self, op): values = ["abc", "1"] - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant(values, dtypes.string) invalid_pattern = "A[" matched = op(input_tensor, invalid_pattern) diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 657d92fa23..a45a325b47 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -104,7 +104,7 @@ class ReluTest(test.TestCase): # The gradient test for ReLU is a bit tricky as the derivative is not well # defined at around zero and we want to avoid that in terms of input values. def testGradientFloat32(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -149,7 +149,7 @@ class ReluTest(test.TestCase): self.assertAllClose(dx_f32_v, dx_f16_v, atol=3e-4) def testGradientFloat64(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -166,7 +166,7 @@ class ReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradGradFloat32(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -183,7 +183,7 @@ class ReluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -201,7 +201,7 @@ class ReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradientScalar(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = variables.Variable(100.) y = nn_ops.relu(x) loss = y**2 @@ -249,7 +249,7 @@ class Relu6Test(test.TestCase): # not well defined at around zero and six and we want to avoid that # in terms of input values. def testGradientFloat32(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 6.1, 6.3, 6.5, 6.7, 6.9], shape=[2, 5], @@ -265,7 +265,7 @@ class Relu6Test(test.TestCase): self.assertLess(err, 1e-4) def testGradientFloat64(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 6.1, 6.3, 6.5, 6.7, 6.9], shape=[2, 5], @@ -313,7 +313,7 @@ class EluTest(test.TestCase): use_gpu=True) def testGradientFloat32(self): - with self.test_session(): + with self.cached_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = constant_op.constant(x_val, name="x") y = nn_ops.elu(x, name="elu") @@ -324,7 +324,7 @@ class EluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradientFloat64(self): - with self.test_session(): + with self.cached_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = constant_op.constant(x_val, dtype=dtypes.float64, name="x") y = nn_ops.elu(x, name="elu") @@ -335,7 +335,7 @@ class EluTest(test.TestCase): self.assertLess(err, 1e-6) def testGradGrad(self): - with self.test_session(): + with self.cached_session(): x = array_ops.placeholder(dtype=dtypes.float32) elu = nn_ops.elu(x) g, = gradients_impl.gradients(elu, x) @@ -346,7 +346,7 @@ class EluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGradFloat32(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -363,7 +363,7 @@ class EluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -415,7 +415,7 @@ class SeluTest(test.TestCase): use_gpu=True) def testGradientFloat32(self): - with self.test_session(): + with self.cached_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = constant_op.constant(x_val, name="x") y = nn_ops.selu(x, name="selu") @@ -426,7 +426,7 @@ class SeluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradientFloat64(self): - with self.test_session(): + with self.cached_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = constant_op.constant(x_val, dtype=dtypes.float64, name="x") y = nn_ops.selu(x, name="selu") @@ -437,7 +437,7 @@ class SeluTest(test.TestCase): self.assertLess(err, 1e-6) def testGradGradFloat32(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -454,7 +454,7 @@ class SeluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -503,7 +503,7 @@ class CreluTest(test.TestCase): use_gpu=True) def testNumbersWithAxis0(self): - with self.test_session(): + with self.cached_session(): crelu = nn_ops.crelu( np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]), axis=0) tf_relu = crelu.eval() @@ -512,7 +512,7 @@ class CreluTest(test.TestCase): self.assertAllEqual(np_crelu, tf_relu) def testNumbersWithAxis1(self): - with self.test_session(): + with self.cached_session(): crelu = nn_ops.crelu( np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]), axis=1) tf_relu = crelu.eval() diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py index ef9b439230..ca3ff1d1df 100644 --- a/tensorflow/python/kernel_tests/reshape_op_test.py +++ b/tensorflow/python/kernel_tests/reshape_op_test.py @@ -94,7 +94,7 @@ class ReshapeTest(test.TestCase): def testFloatReshapeGradThreeDimensions(self): x = np.arange(1., 25.).reshape([2, 3, 4]).astype(np.float32) s = list(np.shape(x)) - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant(x) reshape_out = array_ops.reshape(input_tensor, [1, 8, 3]) err = gradient_checker.compute_gradient_error( diff --git a/tensorflow/python/kernel_tests/reverse_sequence_op_test.py b/tensorflow/python/kernel_tests/reverse_sequence_op_test.py index 9beb615b2c..8fc71e0c57 100644 --- a/tensorflow/python/kernel_tests/reverse_sequence_op_test.py +++ b/tensorflow/python/kernel_tests/reverse_sequence_op_test.py @@ -120,7 +120,7 @@ class ReverseSequenceTest(test.TestCase): batch_axis = 2 seq_lengths = np.asarray([3, 0, 4], dtype=np.int64) - with self.test_session(): + with self.cached_session(): input_t = constant_op.constant(x, shape=x.shape) seq_lengths_t = constant_op.constant(seq_lengths, shape=seq_lengths.shape) reverse_sequence_out = array_ops.reverse_sequence( @@ -171,7 +171,7 @@ class ReverseSequenceTest(test.TestCase): seq_axis=0, batch_axis=3) - with self.test_session(): + with self.cached_session(): inputs = array_ops.placeholder(dtypes.float32, shape=(32, 2, 3)) seq_lengths = array_ops.placeholder(dtypes.int64, shape=(32,)) output = array_ops.reverse_sequence( diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index f2f3023469..86e063cb36 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -294,7 +294,7 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllEqual(scatter_update.get_shape().as_list(), shape) expected_result = np.zeros([2, 2], dtype=np.int32) - with self.test_session(): + with self.cached_session(): ref.initializer.run() self.assertAllEqual(expected_result, scatter_update.eval()) @@ -409,7 +409,7 @@ class ScatterNdTest(test.TestCase): expected = np.array([b"", b"one", b"", b"three", b"four", b"", b"", b"seven"]) scatter = self.scatter_nd(indices, updates, shape=(8,)) - with self.test_session() as sess: + with self.cached_session() as sess: result = sess.run(scatter) self.assertAllEqual(expected, result) @@ -420,7 +420,7 @@ class ScatterNdTest(test.TestCase): dtype=dtypes.string) expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"]) scatter = self.scatter_nd(indices, updates, shape=(8,)) - with self.test_session() as sess: + with self.cached_session() as sess: result = sess.run(scatter) self.assertAllEqual(expected, result) @@ -432,7 +432,7 @@ class ScatterNdTest(test.TestCase): expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]), np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])] scatter = self.scatter_nd(indices, updates, shape=(8,)) - with self.test_session() as sess: + with self.cached_session() as sess: result = sess.run(scatter) self.assertTrue(np.array_equal(result, expected[0]) or np.array_equal(result, expected[1])) @@ -451,7 +451,7 @@ class ScatterNdTest(test.TestCase): scatter = self.scatter_nd(indices, updates, shape) self.assertAllEqual(scatter.get_shape().as_list(), shape) expected_result = np.zeros([2, 2], dtype=np.int32) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_result, scatter.eval()) def testUndefinedIndicesShape(self): @@ -486,7 +486,7 @@ class ScatterNdTest(test.TestCase): updates = array_ops.placeholder(dtypes.int32, shape=None) shape = constant_op.constant([0, 3, 2], dtypes.int32) - with self.test_session(): + with self.cached_session(): with self.assertRaisesOpError( "Indices and updates specified for empty output"): self.scatter_nd(indices, updates, shape).eval(feed_dict={ @@ -500,7 +500,7 @@ class ScatterNdTest(test.TestCase): shape = constant_op.constant([0], dtypes.int32) scatter = self.scatter_nd(indices, updates, shape) - with self.test_session(): + with self.cached_session(): self.assertEqual(scatter.eval().size, 0) def testRank3InvalidShape1(self): @@ -531,7 +531,7 @@ class ScatterNdTest(test.TestCase): [outputs], [updates, input_], [grad_vals]) expected_updates_grad = np.array([1, 4], dtype=np.float64) expected_input_grad = np.array([[1, 2], [3, 4]], dtype=np.float64) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_updates_grad, updates_grad.eval()) if self.non_aliasing_add_test: self.assertAllEqual(expected_input_grad, input_grad.eval()) @@ -548,7 +548,7 @@ class ScatterNdTest(test.TestCase): [outputs], [updates, input_], [grad_vals]) expected_updates_grad = np.array([[1, 2], [3, 4]], dtype=np.float64) expected_input_grad = np.array([[3, 4], [1, 2]], dtype=np.float64) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_updates_grad, updates_grad.eval()) if self.non_aliasing_add_test: self.assertAllEqual(expected_input_grad, input_grad.eval()) @@ -570,7 +570,7 @@ class ScatterNdTest(test.TestCase): [[[3, 4], [5, 6]], [[1, 2], [7, 8]]], dtype=np.float64) expected_input_grad = np.array( [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.float64) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_updates_grad, updates_grad.eval()) if self.non_aliasing_add_test: self.assertAllEqual(expected_input_grad, input_grad.eval()) @@ -607,7 +607,7 @@ class ScatterNdTest(test.TestCase): [[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]] ]]], dtype=np.float64) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected_updates_grad, updates_grad.eval()) if self.non_aliasing_add_test: self.assertAllEqual(expected_input_grad, input_grad.eval()) @@ -616,33 +616,33 @@ class ScatterNdTest(test.TestCase): indices = array_ops.zeros([100000, 1], dtypes.int32) values = np.random.randn(100000) shape = [1] - with self.test_session(): + with self.cached_session(): val = self.scatter_nd(indices, values, shape).eval() self.assertAllClose([np.sum(values)], val) def testSmokeScatterNdBatch2DSliceDim2(self): - with self.test_session(): + with self.cached_session(): indices = array_ops.zeros([3, 5, 2], dtype=dtypes.int32) values = array_ops.zeros([3, 5, 7]) shape = [4, 6, 7] self.scatter_nd(indices, values, shape).eval() def testSmokeScatterNdBatch1DSliceDim2(self): - with self.test_session(): + with self.cached_session(): indices = array_ops.zeros([0, 2], dtype=dtypes.int32) values = array_ops.zeros([0, 7]) shape = [4, 6, 7] self.scatter_nd(indices, values, shape).eval() def testSmokeScatterNdBatch1DSliceDim3ShapeRank7(self): - with self.test_session(): + with self.cached_session(): indices = array_ops.zeros([1, 3], dtype=dtypes.int32) values = array_ops.zeros([1, 6, 7, 8, 9]) shape = [3, 4, 5, 6, 7, 8, 9] self.scatter_nd(indices, values, shape).eval() def testSmokeScatterNdBatch2DSliceDim3ShapeRank7(self): - with self.test_session(): + with self.cached_session(): indices = array_ops.zeros([1, 2, 3], dtype=dtypes.int32) values = array_ops.zeros([1, 2, 6, 7, 8, 9]) shape = [3, 4, 5, 6, 7, 8, 9] diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index a82855dfeb..ce507e4ad7 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -177,7 +177,7 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsInvalid1(self): shape = [4, 4] - with self.test_session(): + with self.cached_session(): tf_x, _ = self._input(shape) indices = [-1, -1, 0, 0] s = math_ops.segment_sum(data=tf_x, segment_ids=indices) @@ -188,7 +188,7 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsInvalid2(self): shape = [4, 4] - with self.test_session(): + with self.cached_session(): tf_x, _ = self._input(shape) indices = [0, 1, 0, 1] s = math_ops.segment_sum(data=tf_x, segment_ids=indices) @@ -197,7 +197,7 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsInvalid3(self): shape = [4, 4] - with self.test_session(): + with self.cached_session(): tf_x, _ = self._input(shape) indices = [0, 1, 2, 0] s = math_ops.segment_sum(data=tf_x, segment_ids=indices) @@ -233,7 +233,7 @@ class SegmentReductionOpTest(SegmentReductionHelper): math_ops.segment_sum, math_ops.segment_mean, math_ops.segment_min, math_ops.segment_max ]: - with self.test_session(): + with self.cached_session(): tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64) s = tf_op(data=tf_x, segment_ids=indices) jacob_t, jacob_n = gradient_checker.compute_gradient( @@ -736,7 +736,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): segment_indices = [0, 1, 2, 2] num_indices = len(segment_indices) for tf_op in [math_ops.sparse_segment_sum, math_ops.sparse_segment_mean]: - with self.test_session(): + with self.cached_session(): tf_indices, _, tf_x, np_x = self._sparse_input( shape, num_indices, dtype=dtypes_lib.float64) s = tf_op(data=tf_x, indices=tf_indices, segment_ids=segment_indices) @@ -758,7 +758,7 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): math_ops.sparse_segment_sum_with_num_segments, math_ops.sparse_segment_mean_with_num_segments, ]: - with self.test_session(): + with self.cached_session(): tf_indices, _, tf_x, np_x = self._sparse_input( shape, num_indices, dtype=dtypes_lib.float64) s = tf_op( diff --git a/tensorflow/python/kernel_tests/session_ops_test.py b/tensorflow/python/kernel_tests/session_ops_test.py index 678016b13d..03e1ae852f 100644 --- a/tensorflow/python/kernel_tests/session_ops_test.py +++ b/tensorflow/python/kernel_tests/session_ops_test.py @@ -31,7 +31,7 @@ from tensorflow.python.platform import test class SessionOpsTest(test.TestCase): def testHandleBasic(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) @@ -45,7 +45,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(500, sess.run(y, feed_dict={f: h.handle})) def testHandleEval(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) @@ -57,7 +57,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(50, h.eval()) def testHandleAndValue(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle and a value. a = constant_op.constant(10) b = constant_op.constant(5) @@ -70,7 +70,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(500, v) def testHandleCond(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle and a value a = constant_op.constant(10) b = constant_op.constant(5) @@ -90,7 +90,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(5000, result) def testHandleForLoop(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Initialize a handle. a = constant_op.constant(0) h = session_ops.get_session_handle(a) @@ -107,7 +107,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(100, h.eval()) def testHandleWhileLoop(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Initialize a handle. a = constant_op.constant(0) h = session_ops.get_session_handle(a) @@ -127,7 +127,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(101, h.eval()) def testHandleMover(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) @@ -148,7 +148,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(100, sess.run(y, feed_dict={f: h.handle})) def testHandleDelete(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) @@ -157,7 +157,7 @@ class SessionOpsTest(test.TestCase): sess.run(h).delete() def testHandleDeleteRaw(self): - with self.test_session() as sess: + with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) @@ -171,7 +171,7 @@ class SessionOpsTest(test.TestCase): sess.run(x, feed_dict={f: raw_h}) def testMultiDevices(self): - with self.test_session() as sess: + with self.cached_session() as sess: with ops.device(test.gpu_device_name()): a = constant_op.constant(1.0) a_handle = sess.run(session_ops.get_session_handle(a)) @@ -189,7 +189,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(3.0, c_handle.eval()) def testHandleGC(self): - with self.test_session() as sess: + with self.cached_session() as sess: # initial values live on CPU with ops.device("/cpu:0"): one = constant_op.constant(1, dtype=dtypes.float32) @@ -213,7 +213,7 @@ class SessionOpsTest(test.TestCase): add_h2: x_handle.handle}) def testHandlePlacement(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = constant_op.constant(1.0) a_handle_op = session_ops.get_session_handle(a) b = constant_op.constant(2.0) @@ -233,7 +233,7 @@ class SessionOpsTest(test.TestCase): self.assertEqual(3.0, c_handle.eval()) def testFeedOneHandleDirectly(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = constant_op.constant(10.0) b = constant_op.constant(5.0) c = math_ops.multiply(a, b) @@ -244,7 +244,7 @@ class SessionOpsTest(test.TestCase): self.assertAllClose(2500.0, sess.run(d, feed_dict={c: h_c})) def testDirectHandleFeedOverlappingWithFetches(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = constant_op.constant(10.0) b = constant_op.constant(5.0) c = math_ops.multiply(a, b) @@ -270,7 +270,7 @@ class SessionOpsTest(test.TestCase): self.assertAllClose(50.0, d_val) def testFeedTwoHandlesDirectly(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = constant_op.constant(10.0) b = constant_op.constant(5.0) c = math_ops.multiply(a, b) @@ -284,7 +284,7 @@ class SessionOpsTest(test.TestCase): self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c})) def testFeedHandleToVariableDirectly(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = variables.Variable(12.0) inc_a = state_ops.assign_add(a, 2.0) b = math_ops.add(a, 5.0) diff --git a/tensorflow/python/kernel_tests/sets_test.py b/tensorflow/python/kernel_tests/sets_test.py index 52b723802f..8335e9c139 100644 --- a/tensorflow/python/kernel_tests/sets_test.py +++ b/tensorflow/python/kernel_tests/sets_test.py @@ -158,7 +158,7 @@ class SetOpsTest(test_util.TensorFlowTestCase): for op in ops: self.assertEqual(None, op.get_shape().dims) self.assertEqual(dtypes.int32, op.dtype) - with self.test_session() as sess: + with self.cached_session() as sess: results = sess.run(ops) self.assertAllEqual(results[0], results[1]) return results[0] @@ -477,7 +477,7 @@ class SetOpsTest(test_util.TensorFlowTestCase): dynamic_values_shape_ops = [] static_indices_shape = None static_values_shape = None - with self.test_session() as sess: + with self.cached_session() as sess: for op in ops: if static_indices_shape is None: static_indices_shape = op.indices.get_shape() @@ -533,7 +533,7 @@ class SetOpsTest(test_util.TensorFlowTestCase): def _set_intersection_count(self, a, b): op = sets.set_size(sets.set_intersection(a, b)) - with self.test_session() as sess: + with self.cached_session() as sess: return sess.run(op) def test_set_difference_multirow_2d(self): @@ -971,7 +971,7 @@ class SetOpsTest(test_util.TensorFlowTestCase): def _set_difference_count(self, a, b, aminusb=True): op = sets.set_size(sets.set_difference(a, b, aminusb)) - with self.test_session() as sess: + with self.cached_session() as sess: return sess.run(op) def test_set_union_multirow_2d(self): @@ -1220,7 +1220,7 @@ class SetOpsTest(test_util.TensorFlowTestCase): def _set_union_count(self, a, b): op = sets.set_size(sets.set_union(a, b)) - with self.test_session() as sess: + with self.cached_session() as sess: return sess.run(op) def _assert_set_operation(self, expected_indices, expected_values, diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index 34e34d9d1b..0304dc3875 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -158,7 +158,7 @@ class ShapeOpsTest(test.TestCase): # Disabled because it takes too long to run, but manually verified # as passing at time of writing. def _test64BitOutput(self): - with self.test_session(): + with self.cached_session(): inp = array_ops.zeros([2**31]) num_elements = array_ops.size_internal( inp, optimize=False, out_type=dtypes.int64) @@ -166,7 +166,7 @@ class ShapeOpsTest(test.TestCase): # Too large for tf.int32 output. with self.assertRaises(errors_impl.InvalidArgumentError): - with self.test_session(): + with self.cached_session(): inp = array_ops.zeros([2**31]) num_elements = array_ops.size_internal( inp, optimize=False, out_type=dtypes.int32) @@ -228,7 +228,7 @@ class ShapeOpsTest(test.TestCase): self._compareExpandDimsAll(choice([2, 3, 5]), -4) def testExpandDimsErrors(self): - with self.test_session(): + with self.cached_session(): self.assertRaises(ValueError, array_ops.expand_dims, np.zeros([2, 3, 5]), -5) self.assertRaises(ValueError, array_ops.expand_dims, @@ -239,7 +239,7 @@ class ShapeOpsTest(test.TestCase): [False, True, True], 4) def testExpandDimsGradient(self): - with self.test_session(): + with self.cached_session(): inp = constant_op.constant( np.random.rand(4, 2).astype("f"), dtype=dtypes.float32) squeezed = array_ops.expand_dims(inp, 1) @@ -249,7 +249,7 @@ class ShapeOpsTest(test.TestCase): self.assertLess(err, 1e-3) def testExpandDimsScalar(self): - with self.test_session(): + with self.cached_session(): inp = constant_op.constant(7) self.assertAllEqual([7], array_ops.expand_dims(inp, 0).eval()) self.assertAllEqual([7], array_ops.expand_dims(inp, -1).eval()) @@ -375,7 +375,7 @@ class ShapeOpsTest(test.TestCase): np.zeros([1, 2, 1]), [2, 3]) def testSqueezeGradient(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 2).astype("f") a = array_ops.reshape(inp, [4, 1, 2]) squeezed = array_ops.squeeze(a, []) @@ -385,7 +385,7 @@ class ShapeOpsTest(test.TestCase): self.assertLess(err, 1e-3) def testSqueezeGradientWithSqueezeDims(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 2).astype("f") a = array_ops.reshape(inp, [4, 1, 2, 1]) squeezed = array_ops.squeeze(a, [1]) @@ -395,7 +395,7 @@ class ShapeOpsTest(test.TestCase): self.assertLess(err, 1e-3) def testSqueezeWithUnknownShape(self): - with self.test_session(): + with self.cached_session(): a = array_ops.placeholder(dtypes.float32, shape=[2, None]) squeezed = array_ops.squeeze(a, [1]) @@ -433,7 +433,7 @@ class TileTest(test.TestCase): self.assertTrue((result == np.tile(inp, (1, 4))).all()) def testIdentityTileAndGrad(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 1).astype(np.float32) a = constant_op.constant(inp) tiled = array_ops.tile(a, [1, 1]) @@ -443,7 +443,7 @@ class TileTest(test.TestCase): self.assertTrue((result == np.tile(inp, (1, 1))).all()) def testEmpty(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(2, 3).astype(np.float32) a = constant_op.constant(inp) tiled = array_ops.tile(a, [5, 0]) @@ -453,7 +453,7 @@ class TileTest(test.TestCase): def testUnknownInputShape(self): """Importing can call _TileShape without shape of known.""" - with self.test_session(): + with self.cached_session(): inp = array_ops.placeholder(dtypes.float32) # unknown shape multiples = constant_op.constant([1, 2, 3, 4], dtype=np.int32) tiled = array_ops.tile(inp, multiples) @@ -503,7 +503,7 @@ class TileTest(test.TestCase): self.assertAllEqual(result, np.tile(inp, (1, 4))) def testInvalidDim(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 1).astype("f") a = constant_op.constant( [float(x) for x in inp.ravel(order="C")], @@ -546,7 +546,7 @@ class TileTest(test.TestCase): self._RunAndVerifyResult(10, use_gpu=True) def testGradientSimpleReduction(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 1).astype("f") a = constant_op.constant( [float(x) for x in inp.flatten()], shape=[4, 1], dtype=dtypes.float32) @@ -561,7 +561,7 @@ class TileTest(test.TestCase): self.assertAllClose(np.sum(grad_inp, axis=1).reshape(4, 1), result, 1e-3) def testGradientStridedReduction(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 2).astype("f") a = constant_op.constant( [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32) @@ -634,7 +634,7 @@ class TileTest(test.TestCase): self._RunAndVerifyGradientResult([2, 1, 3, 3, 2], [1, 3, 3, 1, 2]) def testGradientStridedReductionGC(self): - with self.test_session(): + with self.cached_session(): inp = np.random.rand(4, 2).astype("f") a = constant_op.constant( [float(x) for x in inp.flatten()], shape=[4, 2], dtype=dtypes.float32) @@ -647,7 +647,7 @@ class TileTest(test.TestCase): dtype=dtypes.float32) outputs = array_ops.gather(array_ops.tile(inputs, [3]), [1, 5, 9, 3, 7, 2, 2, 2]) - with self.test_session(): + with self.cached_session(): error = gradient_checker.compute_gradient_error( inputs, inputs.get_shape().as_list(), outputs, outputs.get_shape().as_list()) @@ -659,7 +659,7 @@ class TileTest(test.TestCase): inputs = array_ops.reshape(inputs, [-1, 1, 1]) outputs = array_ops.gather(array_ops.tile(inputs, [3, 4, 2]), [1, 5, 9, 3, 7, 2, 2, 2]) - with self.test_session(): + with self.cached_session(): error = gradient_checker.compute_gradient_error( inputs, inputs.get_shape().as_list(), outputs, outputs.get_shape().as_list()) diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py index 40d384c623..c08d3222b3 100644 --- a/tensorflow/python/kernel_tests/slice_op_test.py +++ b/tensorflow/python/kernel_tests/slice_op_test.py @@ -107,7 +107,7 @@ class SliceTest(test.TestCase): def testScalarInput(self): input_val = 0 - with self.test_session() as sess: + with self.cached_session() as sess: # Test with constant input; shape inference fails. with self.assertRaisesWithPredicateMatch(ValueError, "out of range"): constant_op.constant(input_val)[:].get_shape() @@ -121,7 +121,7 @@ class SliceTest(test.TestCase): def testInvalidIndex(self): input_val = [1, 2] - with self.test_session() as sess: + with self.cached_session() as sess: # Test with constant input; shape inference fails. with self.assertRaisesWithPredicateMatch(ValueError, "out of range"): constant_op.constant(input_val)[1:, 1:].get_shape() diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index fbf1adba9b..e53347c4bc 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -210,7 +210,7 @@ class SoftmaxTest(test.TestCase): self.assertEqual([3, 2, 4], op.get_shape()) def testEmptyInput(self): - with self.test_session(): + with self.cached_session(): x = array_ops.placeholder(dtypes.float32, shape=[0, 3]) self.assertEqual(0, array_ops.size(x).eval()) # reshape would raise if logits is empty @@ -218,7 +218,7 @@ class SoftmaxTest(test.TestCase): nn_ops.softmax(x, axis=0).eval() def testDimTooLarge(self): - with self.test_session(): + with self.cached_session(): # Use placeholder to make sure we get runtime error instead of shape # inference error. dim = array_ops.placeholder_with_default(100, shape=[]) diff --git a/tensorflow/python/kernel_tests/softplus_op_test.py b/tensorflow/python/kernel_tests/softplus_op_test.py index c0269db9ae..afe3df6178 100644 --- a/tensorflow/python/kernel_tests/softplus_op_test.py +++ b/tensorflow/python/kernel_tests/softplus_op_test.py @@ -72,7 +72,7 @@ class SoftplusTest(test.TestCase): use_gpu=True) def testGradient(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -88,7 +88,7 @@ class SoftplusTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGrad(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -105,7 +105,7 @@ class SoftplusTest(test.TestCase): self.assertLess(err, 5e-5) def testGradGradGrad(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -123,7 +123,7 @@ class SoftplusTest(test.TestCase): self.assertLess(err, 5e-5) def testNoInts(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp( errors.InvalidArgumentError, "No OpKernel was registered to support Op 'Softplus'"): diff --git a/tensorflow/python/kernel_tests/softsign_op_test.py b/tensorflow/python/kernel_tests/softsign_op_test.py index a5247ce08d..05a7c53dee 100644 --- a/tensorflow/python/kernel_tests/softsign_op_test.py +++ b/tensorflow/python/kernel_tests/softsign_op_test.py @@ -51,7 +51,7 @@ class SoftsignTest(test.TestCase): use_gpu=True) def testGradient(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], @@ -67,7 +67,7 @@ class SoftsignTest(test.TestCase): self.assertLess(err, 1e-4) def testNoInts(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp( errors.InvalidArgumentError, "No OpKernel was registered to support Op 'Softsign'"): diff --git a/tensorflow/python/kernel_tests/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/spacetobatch_op_test.py index 2a9232b6ae..e267c05915 100644 --- a/tensorflow/python/kernel_tests/spacetobatch_op_test.py +++ b/tensorflow/python/kernel_tests/spacetobatch_op_test.py @@ -551,7 +551,7 @@ class SpaceToBatchNDGradientTest(test.TestCase): def _checkGrad(self, x, block_shape, paddings): block_shape = np.array(block_shape) paddings = np.array(paddings).reshape((len(block_shape), 2)) - with self.test_session(): + with self.cached_session(): tf_x = ops.convert_to_tensor(x) tf_y = array_ops.space_to_batch_nd(tf_x, block_shape, paddings) epsilon = 1e-5 @@ -638,7 +638,7 @@ class RequiredSpaceToBatchPaddingsTest(test.TestCase): t_paddings, t_crops = array_ops.required_space_to_batch_paddings( input_shape_placeholder, block_shape_placeholder, base_paddings_placeholder) - with self.test_session(): + with self.cached_session(): paddings_result = t_paddings.eval(assignments) crops_result = t_crops.eval(assignments) self.assertAllEqual(paddings_result, paddings_const) diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py index 3bb5e899fe..477720302d 100644 --- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py +++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py @@ -99,20 +99,20 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): """, q.accumulator_ref.op.node_def) def testAccumulatorSizeEmpty(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q") self.assertEqual(q.num_accumulated().eval(), 0) def testAccumulatorSetGlobalStep(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([1])) set_global_step_op = q.set_global_step(1) set_global_step_op.run() def testAccumulatorApplyGradFloat32(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) accum_op = q.apply_indexed_slices_grad( @@ -123,7 +123,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): self.assertEqual(q.num_accumulated().eval(), 1) def testDtypes(self): - with self.test_session() as sess: + with self.cached_session() as sess: dtypes = [dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64] for i in range(len(dtypes)): @@ -145,7 +145,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): self._assertEqual_nparray(sum_elems / len(elems), result, sess) def testAccumulatorMultipleAccumulators(self): - with self.test_session() as sess: + with self.cached_session() as sess: q_f32_0 = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2])) q_f32_1 = data_flow_ops.SparseConditionalAccumulator( @@ -175,7 +175,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): self._assertEqual_indexedslices(expected_tensors[i], result) def testAccumulatorTakeGradMean(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=()) @@ -220,7 +220,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): dtypes_lib.float32, name="Q", shape=(), reduction_type="Invalid") def testAccumulatorRepeatedTakeGrad(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=()) @@ -258,7 +258,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): self.assertAllEqual(val.dense_shape, [-1, 2]) def testParallelApplyGradMean(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2])) elems = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] @@ -323,7 +323,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): val, sess) def testParallelTakeGrad(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2])) elems = [e + 1 for e in range(10)] @@ -362,7 +362,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): np.array([[0, 0], [elems[i], 0]]), results[i], sess) def testAccumulatorApplyAndBlockingTake(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([2, 2])) @@ -397,7 +397,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): sess.run(takeg_op) def testAccumulatorCancel(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", @@ -416,7 +416,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): takeg_thread.join() def testNonVectorIndices(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) @@ -428,7 +428,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): grad_values=np.array([1, 2]).astype(np.float32)).run() def testZeroDimensionValues(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) @@ -438,7 +438,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): grad_indices=[0], grad_values=np.array(1).astype(np.float32)).run() def testWrongNonEmptyInputValues(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) @@ -449,7 +449,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): grad_values=np.array([[0, 1, 1]]).astype(np.float32)).run() def testDynamicNonVectorIndices(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) @@ -468,7 +468,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): }) def testDynamicWrongNonEmptyInputValues(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) @@ -486,7 +486,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): }) def testEmptyShapeApply(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([])) @@ -511,7 +511,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): q.apply_grad(grad_indices=[0], grad_values=[1.0]).run() def testValidateShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=[2, 2, None]) @@ -606,7 +606,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): local_step=1).run() def testReturnShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=[2, None]) @@ -631,7 +631,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): self.assertAllEqual(val.dense_shape, [-1, 2, 2, 3]) def testApplyGradtInt32IndicesAndShape(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=tensor_shape.TensorShape([3, 3])) accum_op = q.apply_grad( diff --git a/tensorflow/python/kernel_tests/sparse_cross_op_test.py b/tensorflow/python/kernel_tests/sparse_cross_op_test.py index ca7898d466..6e0714da70 100644 --- a/tensorflow/python/kernel_tests/sparse_cross_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_cross_op_test.py @@ -42,7 +42,7 @@ class SparseCrossOpTest(test.TestCase): 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_dense(self): @@ -62,7 +62,7 @@ class SparseCrossOpTest(test.TestCase): 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_integer_mixed_string_sparse(self): @@ -76,7 +76,7 @@ class SparseCrossOpTest(test.TestCase): '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_integer_mixed_string_dense(self): @@ -94,7 +94,7 @@ class SparseCrossOpTest(test.TestCase): '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2', '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_sparse_cross_dense(self): @@ -111,7 +111,7 @@ class SparseCrossOpTest(test.TestCase): 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_integer_sparse_input(self): @@ -127,7 +127,7 @@ class SparseCrossOpTest(test.TestCase): '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_permutation_3x3x3(self): @@ -169,7 +169,7 @@ class SparseCrossOpTest(test.TestCase): 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2', 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_permutation_3x1x2(self): @@ -188,7 +188,7 @@ class SparseCrossOpTest(test.TestCase): 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_large_batch(self): @@ -221,7 +221,7 @@ class SparseCrossOpTest(test.TestCase): ]) expected_out = self._sparse_tensor(col_out) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_one_column_empty(self): @@ -234,7 +234,7 @@ class SparseCrossOpTest(test.TestCase): self._sparse_tensor([], 1), self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) ]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_empty(sess.run(op)) def test_some_columns_empty(self): @@ -253,7 +253,7 @@ class SparseCrossOpTest(test.TestCase): 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2' ]], 2) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_all_columns_empty(self): @@ -266,7 +266,7 @@ class SparseCrossOpTest(test.TestCase): self._sparse_tensor([]), self._sparse_tensor([]) ]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_empty(sess.run(op)) def test_hashed_zero_bucket_no_hash_key(self): @@ -277,7 +277,7 @@ class SparseCrossOpTest(test.TestCase): ]) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[1971693436396284976]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_hashed_zero_bucket(self): @@ -290,7 +290,7 @@ class SparseCrossOpTest(test.TestCase): hash_key=sparse_ops._DEFAULT_HASH_KEY + 1) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[4847552627144134031]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed. @@ -304,7 +304,7 @@ class SparseCrossOpTest(test.TestCase): num_buckets=100) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[83]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_hashed_output(self): @@ -318,7 +318,7 @@ class SparseCrossOpTest(test.TestCase): hash_key=sparse_ops._DEFAULT_HASH_KEY + 1) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[31]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_hashed__has_no_collision(self): @@ -344,7 +344,7 @@ class SparseCrossOpTest(test.TestCase): self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) ], num_buckets=1000) - with self.test_session() as sess: + with self.cached_session() as sess: out = sess.run(op) self.assertEqual(6, len(out.values)) self.assertAllEqual([[0, i] for i in range(6)], out.indices) diff --git a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py index f50e39d6d5..90009fc33e 100644 --- a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py @@ -130,7 +130,7 @@ class MatMulGradientTest(test.TestCase): def _testGradients(self, tr_a, tr_b, sp_a, sp_b, a_dtype, b_dtype, delta, name): - with self.test_session(): + with self.cached_session(): a = constant_op.constant( RandMatrix( 3, 2, tr_a, round_bfloat=True), dtype=dtypes.float32) diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py index fc39de150e..79efee3f5b 100644 --- a/tensorflow/python/kernel_tests/sparse_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_ops_test.py @@ -628,7 +628,7 @@ class SparseReduceTest(test_util.TensorFlowTestCase): else: np_ans = np.max(np_ans, axis=ra, keepdims=keep_dims) - with self.test_session(): + with self.cached_session(): if do_sum: tf_dense_ans = sparse_ops.sparse_reduce_sum(sp_t, reduction_axes, keep_dims) diff --git a/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py b/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py index 87a4eb9c7b..c71746cc99 100644 --- a/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py +++ b/tensorflow/python/kernel_tests/sparse_to_dense_op_py_test.py @@ -81,7 +81,7 @@ class SparseToDenseTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) def testZeroDefault(self): - with self.test_session(): + with self.cached_session(): x = sparse_ops.sparse_to_dense(2, [4], 7).eval() self.assertAllEqual(x, [0, 0, 7, 0]) @@ -94,12 +94,12 @@ class SparseToDenseTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) def testBadShape(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesWithPredicateMatch(ValueError, "must be rank 1"): _SparseToDense([1, 3], [[5], [3]], 1, -1) def testBadValue(self): - with self.test_session(): + with self.cached_session(): dense = _SparseToDense([1, 3], [5], [[5], [3]], -1) with self.assertRaisesOpError( r"sparse_values has incorrect shape \[2,1\], " @@ -107,20 +107,20 @@ class SparseToDenseTest(test.TestCase): dense.eval() def testBadNumValues(self): - with self.test_session(): + with self.cached_session(): dense = _SparseToDense([1, 3], [5], [1, 2, 3], -1) with self.assertRaisesOpError( r"sparse_values has incorrect shape \[3\], should be \[\] or \[2\]"): dense.eval() def testBadDefault(self): - with self.test_session(): + with self.cached_session(): dense = _SparseToDense([1, 3], [5], [1, 2], [0]) with self.assertRaisesOpError("default_value should be a scalar"): dense.eval() def testOutOfBoundsIndicesWithWithoutValidation(self): - with self.test_session(): + with self.cached_session(): dense = _SparseToDense( sparse_indices=[[1], [10]], output_size=[5], @@ -140,7 +140,7 @@ class SparseToDenseTest(test.TestCase): dense_without_validation.eval() def testRepeatingIndicesWithWithoutValidation(self): - with self.test_session(): + with self.cached_session(): dense = _SparseToDense( sparse_indices=[[1], [1]], output_size=[5], @@ -158,7 +158,7 @@ class SparseToDenseTest(test.TestCase): dense_without_validation.eval() def testUnsortedIndicesWithWithoutValidation(self): - with self.test_session(): + with self.cached_session(): dense = _SparseToDense( sparse_indices=[[2], [1]], output_size=[5], diff --git a/tensorflow/python/kernel_tests/sparsemask_op_test.py b/tensorflow/python/kernel_tests/sparsemask_op_test.py index cf6c9494ae..6f5dd45b61 100644 --- a/tensorflow/python/kernel_tests/sparsemask_op_test.py +++ b/tensorflow/python/kernel_tests/sparsemask_op_test.py @@ -34,7 +34,7 @@ class SparseMaskTest(test.TestCase): out_values = values[1:, :] out_indices = np.array([2, 3, 4], dtype=np.int32) - with self.test_session() as sess: + with self.cached_session() as sess: values_tensor = ops.convert_to_tensor(values) indices_tensor = ops.convert_to_tensor(indices) mask_indices_tensor = ops.convert_to_tensor(mask_indices) diff --git a/tensorflow/python/kernel_tests/string_join_op_test.py b/tensorflow/python/kernel_tests/string_join_op_test.py index ce19333654..e4371ab5b9 100644 --- a/tensorflow/python/kernel_tests/string_join_op_test.py +++ b/tensorflow/python/kernel_tests/string_join_op_test.py @@ -28,7 +28,7 @@ class StringJoinOpTest(test.TestCase): input1 = "a" input2 = [["b"], ["c"]] - with self.test_session(): + with self.cached_session(): output = string_ops.string_join([input0, input1]) self.assertAllEqual(output.eval(), [b"aa", b"ba"]) diff --git a/tensorflow/python/kernel_tests/string_length_op_test.py b/tensorflow/python/kernel_tests/string_length_op_test.py index 075a3204ad..9f013c2c7e 100644 --- a/tensorflow/python/kernel_tests/string_length_op_test.py +++ b/tensorflow/python/kernel_tests/string_length_op_test.py @@ -27,7 +27,7 @@ class StringLengthOpTest(test.TestCase): def testStringLength(self): strings = [[["1", "12"], ["123", "1234"], ["12345", "123456"]]] - with self.test_session() as sess: + with self.cached_session() as sess: lengths = string_ops.string_length(strings) values = sess.run(lengths) self.assertAllEqual(values, [[[1, 2], [3, 4], [5, 6]]]) diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py index b6a0f45adc..b968e885ed 100644 --- a/tensorflow/python/kernel_tests/string_split_op_test.py +++ b/tensorflow/python/kernel_tests/string_split_op_test.py @@ -32,7 +32,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplit(self): strings = ["pigs on the wing", "animals"] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split(strings) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) @@ -42,7 +42,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitEmptyDelimiter(self): strings = ["hello", "hola", b"\xF0\x9F\x98\x8E"] # Last string is U+1F60E - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split(strings, delimiter="") indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], @@ -60,7 +60,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitEmptyToken(self): strings = ["", " a", "b ", " c", " ", " d ", " e", "f ", " g ", " "] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split(strings) indices, values, shape = sess.run(tokens) self.assertAllEqual( @@ -72,7 +72,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitOnSetEmptyToken(self): strings = ["", " a", "b ", " c", " ", " d ", ". e", "f .", " .g. ", " ."] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split(strings, delimiter=" .") indices, values, shape = sess.run(tokens) self.assertAllEqual( @@ -84,7 +84,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitWithDelimiter(self): strings = ["hello|world", "hello world"] - with self.test_session() as sess: + with self.cached_session() as sess: self.assertRaises( ValueError, string_ops.string_split, strings, delimiter=["|", ""]) @@ -106,7 +106,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitWithDelimiterTensor(self): strings = ["hello|world", "hello world"] - with self.test_session() as sess: + with self.cached_session() as sess: delimiter = array_ops.placeholder(dtypes.string) tokens = string_ops.string_split(strings, delimiter=delimiter) @@ -124,7 +124,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitWithDelimitersTensor(self): strings = ["hello.cruel,world", "hello cruel world"] - with self.test_session() as sess: + with self.cached_session() as sess: delimiter = array_ops.placeholder(dtypes.string) tokens = string_ops.string_split(strings, delimiter=delimiter) @@ -143,7 +143,7 @@ class StringSplitOpTest(test.TestCase): def testStringSplitWithNoSkipEmpty(self): strings = ["#a", "b#", "#c#"] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split(strings, "#", skip_empty=False) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], @@ -152,7 +152,7 @@ class StringSplitOpTest(test.TestCase): self.assertAllEqual(values, [b"", b"a", b"b", b"", b"", b"c", b""]) self.assertAllEqual(shape, [3, 3]) - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split(strings, "#") indices, values, shape = sess.run(tokens) self.assertAllEqual(values, [b"a", b"b", b"c"]) @@ -165,7 +165,7 @@ class StringSplitV2OpTest(test.TestCase): def testSplitV2(self): strings = ["pigs on the wing", "animals"] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split_v2(strings) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]]) @@ -180,7 +180,7 @@ class StringSplitV2OpTest(test.TestCase): # ['', '', '4', '5', '', '6', ''] strings = ["1<>2<>3", "<><>4<>5<><>6<>"] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split_v2(strings, sep="<>") indices, values, shape = sess.run(tokens) self.assertAllEqual( @@ -198,7 +198,7 @@ class StringSplitV2OpTest(test.TestCase): # ['1', '2', '', '3', ''] strings = ["1,2,3", "4,5,,6,"] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split_v2(strings, sep=',') indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], @@ -215,7 +215,7 @@ class StringSplitV2OpTest(test.TestCase): #['1', '2', '3'] strings = ["1 2 3", " 4 5 6 "] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split_v2(strings) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], @@ -231,7 +231,7 @@ class StringSplitV2OpTest(test.TestCase): # ['4', '5,,6,'] strings = ["1,2,3", "4,5,,6,"] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], @@ -247,7 +247,7 @@ class StringSplitV2OpTest(test.TestCase): # ['4', '5 6 '] strings = ["1 2 3", " 4 5 6 "] - with self.test_session() as sess: + with self.cached_session() as sess: tokens = string_ops.string_split_v2(strings, maxsplit=1) indices, values, shape = sess.run(tokens) self.assertAllEqual(indices, [[0, 0], [0, 1], diff --git a/tensorflow/python/kernel_tests/string_strip_op_test.py b/tensorflow/python/kernel_tests/string_strip_op_test.py index 30fd477ff4..a96b71490e 100644 --- a/tensorflow/python/kernel_tests/string_strip_op_test.py +++ b/tensorflow/python/kernel_tests/string_strip_op_test.py @@ -28,7 +28,7 @@ class StringStripOpTest(test.TestCase): def test_string_strip(self): strings = ["pigs on the wing", "animals"] - with self.test_session() as sess: + with self.cached_session() as sess: output = string_ops.string_strip(strings) output = sess.run(output) self.assertAllEqual(output, [b"pigs on the wing", b"animals"]) @@ -37,7 +37,7 @@ class StringStripOpTest(test.TestCase): strings = [["pigs on the wing", "animals"], [" hello ", "\n\tworld \r \n"]] - with self.test_session() as sess: + with self.cached_session() as sess: output = string_ops.string_strip(strings) output = sess.run(output) self.assertAllEqual(output, [[b"pigs on the wing", b"animals"], @@ -46,7 +46,7 @@ class StringStripOpTest(test.TestCase): def test_string_strip_with_empty_strings(self): strings = [" hello ", "", "world ", " \t \r \n "] - with self.test_session() as sess: + with self.cached_session() as sess: output = string_ops.string_strip(strings) output = sess.run(output) self.assertAllEqual(output, [b"hello", b"", b"world", b""]) diff --git a/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py b/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py index 2c6064e64b..9cb0c9d18f 100644 --- a/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py +++ b/tensorflow/python/kernel_tests/string_to_hash_bucket_op_test.py @@ -27,7 +27,7 @@ from tensorflow.python.platform import test class StringToHashBucketOpTest(test.TestCase): def testStringToOneHashBucketFast(self): - with self.test_session(): + with self.cached_session(): input_string = array_ops.placeholder(dtypes.string) output = string_ops.string_to_hash_bucket_fast(input_string, 1) result = output.eval(feed_dict={input_string: ['a', 'b', 'c']}) @@ -35,7 +35,7 @@ class StringToHashBucketOpTest(test.TestCase): self.assertAllEqual([0, 0, 0], result) def testStringToHashBucketsFast(self): - with self.test_session(): + with self.cached_session(): input_string = array_ops.placeholder(dtypes.string) output = string_ops.string_to_hash_bucket_fast(input_string, 10) result = output.eval(feed_dict={input_string: ['a', 'b', 'c', 'd']}) @@ -47,7 +47,7 @@ class StringToHashBucketOpTest(test.TestCase): self.assertAllEqual([9, 2, 2, 5], result) def testStringToOneHashBucketLegacyHash(self): - with self.test_session(): + with self.cached_session(): input_string = array_ops.placeholder(dtypes.string) output = string_ops.string_to_hash_bucket(input_string, 1) result = output.eval(feed_dict={input_string: ['a', 'b', 'c']}) @@ -55,7 +55,7 @@ class StringToHashBucketOpTest(test.TestCase): self.assertAllEqual([0, 0, 0], result) def testStringToHashBucketsLegacyHash(self): - with self.test_session(): + with self.cached_session(): input_string = array_ops.placeholder(dtypes.string) output = string_ops.string_to_hash_bucket(input_string, 10) result = output.eval(feed_dict={input_string: ['a', 'b', 'c']}) @@ -66,14 +66,14 @@ class StringToHashBucketOpTest(test.TestCase): self.assertAllEqual([8, 0, 7], result) def testStringToOneHashBucketStrongOneHashBucket(self): - with self.test_session(): + with self.cached_session(): input_string = constant_op.constant(['a', 'b', 'c']) output = string_ops.string_to_hash_bucket_strong( input_string, 1, key=[123, 345]) self.assertAllEqual([0, 0, 0], output.eval()) def testStringToHashBucketsStrong(self): - with self.test_session(): + with self.cached_session(): input_string = constant_op.constant(['a', 'b', 'c']) output = string_ops.string_to_hash_bucket_strong( input_string, 10, key=[98765, 132]) @@ -84,7 +84,7 @@ class StringToHashBucketOpTest(test.TestCase): self.assertAllEqual([4, 2, 8], output.eval()) def testStringToHashBucketsStrongInvalidKey(self): - with self.test_session(): + with self.cached_session(): input_string = constant_op.constant(['a', 'b', 'c']) with self.assertRaisesOpError('Key must have 2 elements'): string_ops.string_to_hash_bucket_strong( diff --git a/tensorflow/python/kernel_tests/string_to_number_op_test.py b/tensorflow/python/kernel_tests/string_to_number_op_test.py index cc4c21b66c..99ee25e125 100644 --- a/tensorflow/python/kernel_tests/string_to_number_op_test.py +++ b/tensorflow/python/kernel_tests/string_to_number_op_test.py @@ -29,7 +29,7 @@ _ERROR_MESSAGE = "StringToNumberOp could not correctly convert string: " class StringToNumberOpTest(test.TestCase): def _test(self, tf_type, good_pairs, bad_pairs): - with self.test_session(): + with self.cached_session(): # Build a small testing graph. input_string = array_ops.placeholder(dtypes.string) output = parsing_ops.string_to_number( diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py index 753eac9c62..4d163a0f6f 100644 --- a/tensorflow/python/kernel_tests/substr_op_test.py +++ b/tensorflow/python/kernel_tests/substr_op_test.py @@ -35,7 +35,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = b"ell" substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -68,7 +68,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = b"y" substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -90,7 +90,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = [b"ell", b"orl"] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -118,7 +118,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): [b"en", b"en", b"en"]] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -132,7 +132,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): [b"xteen", b"vente", b"hteen"]] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -147,7 +147,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = [[b"e", b"ev", b"lve"], [b"h", b"te", b"tee"], [b"i", b"te", b"hte"], [b"i", b"en", b"nty"]] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -158,7 +158,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = [[b"hir", b"en", b"t"], [b"e", b"ur", b"ift"], [b"ee", b"ee", b"ft"]] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -168,7 +168,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): length = np.array([3, 2, 1], dtype) expected_value = [b"hir", b"rt", b"n"] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -187,7 +187,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array(7, dtype) length = np.array(3, dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -205,7 +205,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array(4, dtype) length = np.array(1, dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -214,7 +214,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array(-4, dtype) length = np.array(1, dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -224,7 +224,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 3]], dtype) length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -243,7 +243,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array([1, 2, 4], dtype) length = np.array([1, 2, 3], dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -294,7 +294,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): self._testMismatchPosLenShapes(dtype) def testWrongDtype(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(TypeError): string_ops.substr(b"test", 3.0, 1) with self.assertRaises(TypeError): diff --git a/tensorflow/python/kernel_tests/summary_ops_test.py b/tensorflow/python/kernel_tests/summary_ops_test.py index 2da7107f61..0c500120b0 100644 --- a/tensorflow/python/kernel_tests/summary_ops_test.py +++ b/tensorflow/python/kernel_tests/summary_ops_test.py @@ -34,7 +34,7 @@ class SummaryOpsTest(test.TestCase): return summ def testScalarSummary(self): - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant([10.0, 20.0]) summ = logging_ops.scalar_summary(["c1", "c2"], const, name="mysumm") value = sess.run(summ) @@ -45,7 +45,7 @@ class SummaryOpsTest(test.TestCase): """, self._AsSummary(value)) def testScalarSummaryDefaultName(self): - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant([10.0, 20.0]) summ = logging_ops.scalar_summary(["c1", "c2"], const) value = sess.run(summ) @@ -56,7 +56,7 @@ class SummaryOpsTest(test.TestCase): """, self._AsSummary(value)) def testMergeSummary(self): - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant(10.0) summ1 = summary.histogram("h", const) summ2 = logging_ops.scalar_summary("c", const) diff --git a/tensorflow/python/kernel_tests/summary_tensor_op_test.py b/tensorflow/python/kernel_tests/summary_tensor_op_test.py index d534aadb79..0f4643393a 100644 --- a/tensorflow/python/kernel_tests/summary_tensor_op_test.py +++ b/tensorflow/python/kernel_tests/summary_tensor_op_test.py @@ -42,7 +42,7 @@ class SummaryOpsTest(test.TestCase): self.assertTrue(np.array_equal(actual, expected)) def testTags(self): - with self.test_session() as sess: + with self.cached_session() as sess: c = constant_op.constant(1) s1 = summary_ops.tensor_summary("s1", c) with ops.name_scope("foo"): @@ -65,7 +65,7 @@ class SummaryOpsTest(test.TestCase): self.assertEqual(v4.tag, "foo/zod/TensorSummary") def testScalarSummary(self): - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant(10.0) summ = summary_ops.tensor_summary("foo", const) result = sess.run(summ) @@ -76,7 +76,7 @@ class SummaryOpsTest(test.TestCase): def testStringSummary(self): s = six.b("foobar") - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant(s) summ = summary_ops.tensor_summary("foo", const) result = sess.run(summ) @@ -86,7 +86,7 @@ class SummaryOpsTest(test.TestCase): self._AssertNumpyEq(n, s) def testManyScalarSummary(self): - with self.test_session() as sess: + with self.cached_session() as sess: const = array_ops.ones([5, 5, 5]) summ = summary_ops.tensor_summary("foo", const) result = sess.run(summ) @@ -96,7 +96,7 @@ class SummaryOpsTest(test.TestCase): def testManyStringSummary(self): strings = [[six.b("foo bar"), six.b("baz")], [six.b("zoink"), six.b("zod")]] - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant(strings) summ = summary_ops.tensor_summary("foo", const) result = sess.run(summ) @@ -106,7 +106,7 @@ class SummaryOpsTest(test.TestCase): def testManyBools(self): bools = [True, True, True, False, False, False] - with self.test_session() as sess: + with self.cached_session() as sess: const = constant_op.constant(bools) summ = summary_ops.tensor_summary("foo", const) result = sess.run(summ) @@ -116,7 +116,7 @@ class SummaryOpsTest(test.TestCase): self._AssertNumpyEq(n, bools) def testSummaryDescriptionAndDisplayName(self): - with self.test_session() as sess: + with self.cached_session() as sess: def get_description(summary_op): summ_str = sess.run(summary_op) diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py index 8ad29afd0a..d8d76440f1 100644 --- a/tensorflow/python/kernel_tests/tensordot_op_test.py +++ b/tensorflow/python/kernel_tests/tensordot_op_test.py @@ -48,7 +48,7 @@ class TensordotTest(test_lib.TestCase): with self.assertRaises(ValueError): math_ops.tensordot(a, b, (a_axes, b_axes)) # Invalid dynamic shapes. - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Matrix size-incompatible"): a_ph = array_ops.placeholder(dtypes.float32) @@ -80,7 +80,7 @@ class TensordotTest(test_lib.TestCase): output = math_ops.tensordot(a_ph, b_ph, axes_ph) # Note: We don't support scalar Tensor values for axes. for axes_value in 1, [1], [0, 1], [[1]], [[0, 1]], [[0], [7]]: - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaises(errors_impl.InvalidArgumentError): _ = sess.run( [output], feed_dict={ @@ -92,7 +92,7 @@ class TensordotTest(test_lib.TestCase): # Test case for 11950 def test_valid_axis(self): for axes_value in [1, 2], [[1], [2]], [[], []], 0: - with self.test_session() as sess: + with self.cached_session(): np_a = np.ones((3, 3)) np_b = np.array([2, 3, 1])[None, None] np_ans = np.tensordot(np_a, np_b, axes_value) diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py index 290200ce45..f42800226e 100644 --- a/tensorflow/python/kernel_tests/transpose_op_test.py +++ b/tensorflow/python/kernel_tests/transpose_op_test.py @@ -451,13 +451,13 @@ class TransposeTest(test.TestCase): array_ops.transpose(array_ops.placeholder(dtypes.int32)).get_shape()) def testNullTensor(self): - with self.test_session(): + with self.cached_session(): x = constant_op.constant([], dtype=dtypes.float32, shape=[1, 4, 0]) xt = array_ops.transpose(x, [0, 2, 1]).eval() self.assertAllEqual(xt.shape, (1, 0, 4)) def _testError(self, x, p, err): - with self.test_session(): + with self.cached_session(): with self.assertRaisesOpError(err): array_ops.transpose(x, p).eval() diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index bbc040dc13..316570e13e 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -30,7 +30,7 @@ class UniqueTest(test.TestCase): def testInt32(self): x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: + with self.cached_session() as sess: y, idx = array_ops.unique(x) tf_y, tf_idx = sess.run([y, idx]) @@ -41,7 +41,7 @@ class UniqueTest(test.TestCase): def testInt32OutIdxInt64(self): x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: + with self.cached_session() as sess: y, idx = array_ops.unique(x, out_idx=dtypes.int64) tf_y, tf_idx = sess.run([y, idx]) @@ -53,7 +53,7 @@ class UniqueTest(test.TestCase): def testString(self): indx = np.random.randint(65, high=122, size=7000) x = [chr(i) for i in indx] - with self.test_session() as sess: + with self.cached_session() as sess: y, idx = array_ops.unique(x) tf_y, tf_idx = sess.run([y, idx]) @@ -65,7 +65,7 @@ class UniqueTest(test.TestCase): def testInt32Axis(self): for dtype in [np.int32, np.int64]: x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) - with self.test_session() as sess: + with self.cached_session() as sess: y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype)) tf_y0, tf_idx0 = sess.run([y0, idx0]) y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype)) @@ -79,7 +79,7 @@ class UniqueTest(test.TestCase): # This test is only temporary, once V2 is used # by default, the axis will be wrapped to allow `axis=None`. x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: + with self.cached_session() as sess: y, idx = gen_array_ops.unique_v2(x, axis=np.array([], np.int32)) tf_y, tf_idx = sess.run([y, idx]) @@ -93,7 +93,7 @@ class UniqueWithCountsTest(test.TestCase): def testInt32(self): x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: + with self.cached_session() as sess: y, idx, count = array_ops.unique_with_counts(x) tf_y, tf_idx, tf_count = sess.run([y, idx, count]) @@ -106,7 +106,7 @@ class UniqueWithCountsTest(test.TestCase): def testInt32OutIdxInt64(self): x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: + with self.cached_session() as sess: y, idx, count = array_ops.unique_with_counts(x, out_idx=dtypes.int64) tf_y, tf_idx, tf_count = sess.run([y, idx, count]) @@ -121,7 +121,7 @@ class UniqueWithCountsTest(test.TestCase): indx = np.random.randint(65, high=122, size=7000) x = [chr(i) for i in indx] - with self.test_session() as sess: + with self.cached_session() as sess: y, idx, count = array_ops.unique_with_counts(x) tf_y, tf_idx, tf_count = sess.run([y, idx, count]) @@ -136,7 +136,7 @@ class UniqueWithCountsTest(test.TestCase): def testInt32Axis(self): for dtype in [np.int32, np.int64]: x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) - with self.test_session() as sess: + with self.cached_session() as sess: y0, idx0, count0 = gen_array_ops.unique_with_counts_v2( x, axis=np.array([0], dtype)) tf_y0, tf_idx0, tf_count0 = sess.run([y0, idx0, count0]) @@ -154,7 +154,7 @@ class UniqueWithCountsTest(test.TestCase): # This test is only temporary, once V2 is used # by default, the axis will be wrapped to allow `axis=None`. x = np.random.randint(2, high=10, size=7000) - with self.test_session() as sess: + with self.cached_session() as sess: y, idx, count = gen_array_ops.unique_with_counts_v2( x, axis=np.array([], np.int32)) tf_y, tf_idx, tf_count = sess.run([y, idx, count]) diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py index 1ee6e0866a..b373c419b6 100644 --- a/tensorflow/python/kernel_tests/unstack_op_test.py +++ b/tensorflow/python/kernel_tests/unstack_op_test.py @@ -99,7 +99,7 @@ class UnstackOpTest(test.TestCase): self.assertLess(err, 1e-6) def testInferNum(self): - with self.test_session(): + with self.cached_session(): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): x = array_ops.placeholder(np.float32, shape=shape) cs = array_ops.unstack(x) @@ -131,13 +131,13 @@ class UnstackOpTest(test.TestCase): for j in range(-i, i): expected = np_split_squeeze(a, j) - with self.test_session() as sess: + with self.cached_session() as sess: actual_unstack = sess.run(array_ops.unstack(a, axis=j)) self.assertAllEqual(expected, actual_unstack) def testAxis0Default(self): - with self.test_session() as sess: + with self.cached_session() as sess: a = constant_op.constant([[1, 2, 3], [4, 5, 6]], name='a') unstacked = sess.run(array_ops.unstack(a)) @@ -156,7 +156,7 @@ class UnstackOpTest(test.TestCase): array_ops.unstack(a, axis=-3) def testZeroLengthDim(self): - with self.test_session(): + with self.cached_session(): x = array_ops.zeros(shape=(0, 1, 2)) y = array_ops.unstack(x, axis=1)[0].eval() self.assertEqual(y.shape, (0, 2)) diff --git a/tensorflow/python/kernel_tests/variable_ops_test.py b/tensorflow/python/kernel_tests/variable_ops_test.py index cf369c0718..3d2f8b6155 100644 --- a/tensorflow/python/kernel_tests/variable_ops_test.py +++ b/tensorflow/python/kernel_tests/variable_ops_test.py @@ -118,7 +118,7 @@ class VariableOpTest(test.TestCase): self.assertEqual(tensor_shape.unknown_shape(), assigned.get_shape()) def testAssignNoShape(self): - with self.test_session(): + with self.cached_session(): value = self._NewShapelessTensor() var = state_ops.variable_op([1, 2], dtypes.float32, set_shape=False) self.assertEqual(tensor_shape.unknown_shape(), var.get_shape()) @@ -126,7 +126,7 @@ class VariableOpTest(test.TestCase): state_ops.assign(var, value).get_shape()) def testAssignNoShapeNoValidateShape(self): - with self.test_session(): + with self.cached_session(): value = self._NewShapelessTensor() var = state_ops.variable_op([1, 2], dtypes.float32, set_shape=False) self.assertEqual(tensor_shape.unknown_shape(), var.get_shape()) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index d57b79cb90..401e1ae102 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -113,7 +113,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(w.constraint, constraint) def testStringDefaultInitializer(self): - with self.test_session(): + with self.cached_session(): v = variable_scope.get_variable("string", shape=[], dtype=dtypes.string) variables_lib.global_variables_initializer().run() self.assertAllEqual(compat.as_bytes(v.eval()), b"") @@ -263,7 +263,7 @@ class VariableScopeTest(test.TestCase): # TODO(alive): support variable partitioning/caching in eager mode. def testVarScopeCachingDevice(self): - with self.test_session(): + with self.cached_session(): caching_device = "/job:moo" with variable_scope.variable_scope("tower"): with variable_scope.variable_scope( @@ -367,7 +367,7 @@ class VariableScopeTest(test.TestCase): variable_scope.get_variable("s", initializer=init, dtype=dtypes.float64) def testControlDeps(self): - with self.test_session() as sess: + with self.cached_session() as sess: v0 = variable_scope.get_variable( "v0", [1], initializer=init_ops.constant_initializer(0)) with ops.control_dependencies([v0.value()]): @@ -403,7 +403,7 @@ class VariableScopeTest(test.TestCase): variable_scope._DEFAULT_USE_RESOURCE = old def testControlFlow(self): - with self.test_session() as sess: + with self.cached_session() as sess: v0 = variable_scope.get_variable( "v0", [], initializer=init_ops.constant_initializer(0)) var_dict = {} @@ -513,7 +513,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "testVarScopeNameScope3/scope2/") def testVarScopeOriginalNameScope(self): - with self.test_session(): + with self.cached_session(): with ops.name_scope("scope1"): with variable_scope.variable_scope("tower") as tower: self.assertEqual(tower.original_name_scope, "scope1/tower/") @@ -536,7 +536,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc3, "scope1/tower/bar_1/") def testVarScopeObjectReuse(self): - with self.test_session(): + with self.cached_session(): vs = None with variable_scope.variable_scope("jump", reuse=True) as scope: vs = scope @@ -563,7 +563,7 @@ class VariableScopeTest(test.TestCase): self.assertFalse(jump_no_reuse.reuse) def testVarScopeGetOrCreateReuse(self): - with self.test_session(): + with self.cached_session(): def test_value(value): x = constant_op.constant(value) @@ -582,7 +582,7 @@ class VariableScopeTest(test.TestCase): test_value(17.) def testVarOpScope(self): - with self.test_session(): + with self.cached_session(): with ops.name_scope("testVarOpScope1"): with variable_scope.variable_scope("tower", "default", []): self.assertEqual( @@ -608,7 +608,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "testVarOpScope2/default_1/testVarOpScope2/") def testVarOpScopeUniqueNamesInterleavedSubstringScopes(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope(None, "defaultScope1"): with variable_scope.variable_scope(None, "layer"): self.assertEqual( @@ -631,7 +631,7 @@ class VariableScopeTest(test.TestCase): "defaultScope1_2/layer/w:0") def testVarOpScopeUniqueNamesWithJump(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("default") as default: with variable_scope.variable_scope(None, "layer"): self.assertEqual( @@ -647,7 +647,7 @@ class VariableScopeTest(test.TestCase): variable_scope.get_variable("w", []).name, "default/layer_2/w:0") def testVarOpScopeReuse(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer") as outer: with variable_scope.variable_scope("tower", "default", []): self.assertEqual( @@ -673,7 +673,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "outer_1/default/scope2/") def testVarScopeGetVar(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("root"): with variable_scope.variable_scope("towerA") as tower_a: va = variable_scope.get_variable("v", [1]) @@ -719,7 +719,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual("dtype" in str(exc.exception), True) def testVarScopeOuterScope(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer") as outer: pass with variable_scope.variable_scope(outer): @@ -743,7 +743,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "outer_2/default/scope2/") def testVarScopeNestedOuterScope(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer") as outer: with variable_scope.variable_scope(outer): self.assertEqual( @@ -768,7 +768,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "outer/default_1/scope2/") def testVarOpScopeReuseParam(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer") as outer: with variable_scope.variable_scope("tower", "default", []): self.assertEqual( @@ -795,14 +795,14 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "outer_1/default/scope2/") def testVarOpScopeReuseError(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): with variable_scope.variable_scope(None, "default", reuse=True): self.assertEqual( variable_scope.get_variable("w", []).name, "outer/tower/w:0") def testVarOpScopeOuterScope(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer") as outer: pass with variable_scope.variable_scope(outer, "default", []): @@ -827,7 +827,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "outer_2/default/scope2/") def testVarOpScopeNestedOuterScope(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer") as outer: with variable_scope.variable_scope(outer, "default", []): self.assertEqual( @@ -851,7 +851,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(sc2, "outer_1/default/scope2/") def testBasicWhenAuxiliaryNameScopeIsFalse(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope( "scope", auxiliary_name_scope=False) as scope: self.assertEqual(scope.original_name_scope, "") @@ -886,7 +886,7 @@ class VariableScopeTest(test.TestCase): constant_op.constant([], name="c").name, "outer/inner/c:0") def testCreatedByDefaultNameWhenAuxiliaryNameScopeIsFalse(self): - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope( None, default_name="default", auxiliary_name_scope=False) as scope: self.assertEqual(scope.original_name_scope, "") @@ -910,7 +910,7 @@ class VariableScopeTest(test.TestCase): constant_op.constant([], name="c").name, "outer/default/c:0") def testReenterRootScopeWhenAuxiliaryNameScopeIsFalse(self): - with self.test_session(): + with self.cached_session(): root_scope = variable_scope.get_variable_scope() with variable_scope.variable_scope( root_scope, auxiliary_name_scope=False) as scope: @@ -927,7 +927,7 @@ class VariableScopeTest(test.TestCase): constant_op.constant([], name="c1").name, "outer/c1:0") def testAuxiliaryNameScopeIsInvalid(self): - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(TypeError, "auxiliary_name_scope"): with variable_scope.variable_scope( None, default_name="scope", auxiliary_name_scope="invalid"): @@ -947,7 +947,7 @@ class VariableScopeTest(test.TestCase): def testReuseScopeWithoutNameScopeCollision(self): # Github issue: #13429 - with self.test_session(): + with self.cached_session(): with variable_scope.variable_scope("outer"): with variable_scope.variable_scope("inner") as inner: pass @@ -1021,7 +1021,7 @@ class VariableScopeTest(test.TestCase): self.assertEqual(varname_type[1], ("y", dtypes.int64)) def testGetCollection(self): - with self.test_session(): + with self.cached_session(): _ = variable_scope.get_variable("testGetCollection_a", []) _ = variable_scope.get_variable( "testGetCollection_b", [], trainable=False) @@ -1075,7 +1075,7 @@ class VariableScopeTest(test.TestCase): ]) def testGetTrainableVariablesWithGetVariable(self): - with self.test_session(): + with self.cached_session(): _ = variable_scope.get_variable("testGetTrainableVariables_a", []) with variable_scope.variable_scope( "testGetTrainableVariables_foo") as scope: @@ -1111,7 +1111,7 @@ class VariableScopeTest(test.TestCase): trainable=True) def testGetTrainableVariablesWithVariable(self): - with self.test_session(): + with self.cached_session(): _ = variable_scope.variable(1.0, name="testGetTrainableVariables_a") with variable_scope.variable_scope( "testGetTrainableVariables_foo") as scope: @@ -1150,7 +1150,7 @@ class VariableScopeTest(test.TestCase): trainable=True) def testGetGlobalVariables(self): - with self.test_session(): + with self.cached_session(): _ = variable_scope.get_variable("testGetGlobalVariables_a", []) with variable_scope.variable_scope("testGetGlobalVariables_foo") as scope: _ = variable_scope.get_variable("testGetGlobalVariables_b", []) @@ -1160,7 +1160,7 @@ class VariableScopeTest(test.TestCase): "testGetGlobalVariables_b:0"]) def testGetLocalVariables(self): - with self.test_session(): + with self.cached_session(): _ = variable_scope.get_variable( "a", [], collections=[ops.GraphKeys.LOCAL_VARIABLES]) with variable_scope.variable_scope("foo") as scope: @@ -1396,7 +1396,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase): self.assertEqual("scope/v/0:0", true_vars[0].name) self.assertEqual("scope/v/1:0", true_vars[1].name) self.assertEqual("custom_getter/add:0", v.name) - with self.test_session() as sess: + with self.cached_session() as sess: variables_lib.global_variables_initializer().run() np_vars, np_v = sess.run([true_vars, v]) self.assertAllClose(np_v, sum(np_vars)) @@ -1436,7 +1436,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase): self.assertEqual(template % (1, 1, 0), true_vars[6].name) self.assertEqual(template % (1, 1, 1), true_vars[7].name) - with self.test_session() as sess: + with self.cached_session() as sess: variables_lib.global_variables_initializer().run() np_vars, np_v = sess.run([true_vars, v]) # take products of sums of products diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index 2b9c62ad6f..2e7975667c 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -42,7 +42,7 @@ from tensorflow.python.util import compat class VariablesTestCase(test.TestCase): def testInitialization(self): - with self.test_session(): + with self.cached_session(): var0 = variables.Variable(0.0) self.assertEqual("Variable:0", var0.name) self.assertEqual("Variable", var0._shared_name) @@ -69,7 +69,7 @@ class VariablesTestCase(test.TestCase): self.assertAllClose(1.1, var1.eval()) def testInitializationOrder(self): - with self.test_session(): + with self.cached_session(): rnd = variables.Variable(random_ops.random_uniform([3, 6]), name="rnd") self.assertEqual("rnd:0", rnd.name) self.assertEqual([3, 6], rnd.get_shape()) @@ -106,7 +106,7 @@ class VariablesTestCase(test.TestCase): pass def testAssignments(self): - with self.test_session(): + with self.cached_session(): var = variables.Variable(0.0) plus_one = var.assign_add(1.0) minus_one = var.assign_sub(2.0) @@ -142,7 +142,7 @@ class VariablesTestCase(test.TestCase): self.assertAllClose(4.0, var.eval()) def testZeroSizeStringAssign(self): - with self.test_session() as sess: + with self.cached_session() as sess: array = variables.Variable( initial_value=array_ops.zeros((0,), dtype=dtypes.string), name="foo", @@ -154,7 +154,7 @@ class VariablesTestCase(test.TestCase): self.assertEqual([], list(sess.run(copy_op))) def _countUpToTest(self, dtype): - with self.test_session(): + with self.cached_session(): zero = constant_op.constant(0, dtype=dtype) var = variables.Variable(zero) count_up_to = var.count_up_to(3) @@ -186,7 +186,7 @@ class VariablesTestCase(test.TestCase): self._countUpToTest(dtypes.int64) def testControlDepsNone(self): - with self.test_session(): + with self.cached_session(): c = constant_op.constant(1.0) with ops.control_dependencies([c]): # d get the control dep. @@ -199,7 +199,7 @@ class VariablesTestCase(test.TestCase): self.assertEqual([], var_x._ref().op.control_inputs) # pylint: disable=protected-access def testControlFlow(self): - with self.test_session() as sess: + with self.cached_session() as sess: v0 = variables.Variable(0, name="v0") var_dict = {} @@ -248,7 +248,7 @@ class VariablesTestCase(test.TestCase): control_flow_ops.while_loop(cond, body, [0, 0]) def testUseVariableAsTensor(self): - with self.test_session(): + with self.cached_session(): var_x = variables.Variable(2.0) var_y = variables.Variable(3.0) variables.global_variables_initializer().run() @@ -257,7 +257,7 @@ class VariablesTestCase(test.TestCase): self.assertAllClose(5.0, math_ops.add(var_x, var_y).eval()) def testZeroSizeVarSameAsConst(self): - with self.test_session(): + with self.cached_session(): zero_size_var = variables.Variable(array_ops.zeros([0, 2])) zero_size_const = array_ops.ones([2, 0]) variable_mul = math_ops.matmul(zero_size_const, zero_size_var) @@ -269,7 +269,7 @@ class VariablesTestCase(test.TestCase): self.assertAllClose([[0., 0.], [0., 0.]], variable_output) def testCachingDevice(self): - with self.test_session(): + with self.cached_session(): var = variables.Variable(2.0) self.assertEqual(var.device, var.value().device) self.assertEqual(var.device, var.initialized_value().device) @@ -279,7 +279,7 @@ class VariablesTestCase(test.TestCase): self.assertTrue(var_cached.value().device.startswith("/job:foo")) def testCollections(self): - with self.test_session(): + with self.cached_session(): var_x = variables.Variable(2.0) var_y = variables.Variable(2.0, trainable=False) var_z = variables.Variable(2.0, trainable=True) @@ -294,7 +294,7 @@ class VariablesTestCase(test.TestCase): self.assertEqual([var_x, var_z, var_t], variables.trainable_variables()) def testCollectionsWithScope(self): - with self.test_session(): + with self.cached_session(): with ops.name_scope("scope_1"): var_x = variables.Variable(2.0) with ops.name_scope("scope_2"): @@ -309,7 +309,7 @@ class VariablesTestCase(test.TestCase): self.assertEqual([var_y], variables.trainable_variables("scope_2")) def testOperators(self): - with self.test_session(): + with self.cached_session(): var_f = variables.Variable([2.0]) add = var_f + 0.0 radd = 1.0 + var_f @@ -382,13 +382,13 @@ class VariablesTestCase(test.TestCase): self.assertAllClose([[20.0, 30.0], [40.0, 60.0]], rmatmul.eval()) def testSession(self): - with self.test_session() as sess: + with self.cached_session() as sess: var = variables.Variable([1, 12]) variables.global_variables_initializer().run() self.assertAllClose([1, 12], sess.run(var)) def testDevicePlacement(self): - with self.test_session() as sess: + with self.cached_session() as sess: with ops.device("/cpu:0"): var = variables.Variable([1, 12]) init_value = var.initialized_value() @@ -408,7 +408,7 @@ class VariablesTestCase(test.TestCase): def testInitializerFunction(self): value = [[-42], [133.7]] shape = [2, 1] - with self.test_session(): + with self.cached_session(): initializer = lambda: constant_op.constant(value) v1 = variables.Variable(initializer, dtype=dtypes.float32) @@ -443,7 +443,7 @@ class VariablesTestCase(test.TestCase): constraint=constraint) def testNoRefDataRace(self): - with self.test_session(): + with self.cached_session(): a = variables.Variable([1, 2, 3], dtype=dtypes.float32) b = variables.Variable(a.initialized_value() + 2) c = variables.Variable(b.initialized_value() + 2) @@ -453,7 +453,7 @@ class VariablesTestCase(test.TestCase): self.assertAllEqual(c.eval(), [5, 6, 7]) def testInitializerFunctionDevicePlacement(self): - with self.test_session(): + with self.cached_session(): initializer = lambda: constant_op.constant(42.0) with ops.device("/cpu:100"): v1 = variables.Variable(initializer, dtype=dtypes.float32, name="v1") @@ -471,11 +471,11 @@ class VariablesTestCase(test.TestCase): self.assertEqual(expected_group_v2, i.op.colocation_groups()) def testVariableDefInitializedInstances(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: v_def = variables.Variable( initial_value=constant_op.constant(3.0)).to_proto() - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: # v describes a VariableDef-based variable without an initial value. v = variables.Variable(variable_def=v_def) self.assertEqual(3.0, sess.run(v.initialized_value())) @@ -486,7 +486,7 @@ class VariablesTestCase(test.TestCase): self.assertEqual(1.0, v.initialized_value().eval()) v_def.ClearField("initial_value_name") - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: # Restoring a legacy VariableDef proto that does not have # initial_value_name set should still work. v = variables.Variable(variable_def=v_def) @@ -514,7 +514,7 @@ class VariablesTestCase(test.TestCase): .trainable) def testLoad(self): - with self.test_session(): + with self.cached_session(): var = variables.Variable(np.zeros((5, 5), np.float32)) variables.global_variables_initializer().run() var.load(np.ones((5, 5), np.float32)) @@ -540,12 +540,12 @@ class VariablesTestCase(test.TestCase): class IsInitializedTest(test.TestCase): def testNoVars(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: uninited = variables.report_uninitialized_variables() self.assertEqual(0, sess.run(uninited).size) def testAssertVariablesInitialized(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: v = variables.Variable([1, 2], name="v") w = variables.Variable([3, 4], name="w") _ = v, w @@ -555,7 +555,7 @@ class IsInitializedTest(test.TestCase): self.assertEqual(0, sess.run(uninited).size) def testVariableList(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: v = variables.Variable([1, 2], name="v") w = variables.Variable([3, 4], name="w") uninited = variables.report_uninitialized_variables() @@ -566,14 +566,14 @@ class IsInitializedTest(test.TestCase): self.assertEqual(0, sess.run(uninited).size) def testZeroSizeVarInitialized(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: v = variables.Variable(array_ops.zeros([0, 2]), name="v") uninited = variables.report_uninitialized_variables() v.initializer.run() # not strictly necessary self.assertEqual(0, sess.run(uninited).size) def testTrainingWithZeroSizeVar(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: a = variables.Variable(array_ops.zeros([0, 2])) b = variables.Variable(array_ops.ones([2, 2])) objective = math_ops.reduce_sum(b + math_ops.matmul( @@ -592,7 +592,7 @@ class ObsoleteIsInitializedTest(test.TestCase): self.assertEqual(None, variables.assert_variables_initialized()) def testVariables(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: v = variables.Variable([1, 2]) w = variables.Variable([3, 4]) _ = v, w @@ -603,7 +603,7 @@ class ObsoleteIsInitializedTest(test.TestCase): sess.run(inited) def testVariableList(self): - with ops.Graph().as_default(), self.test_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: v = variables.Variable([1, 2]) w = variables.Variable([3, 4]) inited = variables.assert_variables_initialized([v]) diff --git a/tensorflow/python/kernel_tests/weights_broadcast_test.py b/tensorflow/python/kernel_tests/weights_broadcast_test.py index eda2856e0b..85f9abc69f 100644 --- a/tensorflow/python/kernel_tests/weights_broadcast_test.py +++ b/tensorflow/python/kernel_tests/weights_broadcast_test.py @@ -44,7 +44,7 @@ class AssertBroadcastableTest(test.TestCase): values_placeholder = array_ops.placeholder(dtypes_lib.float32) dynamic_op = weights_broadcast_ops.assert_broadcastable( weights=weights_placeholder, values=values_placeholder) - with self.test_session(): + with self.cached_session(): static_op.run() dynamic_op.run(feed_dict={ weights_placeholder: weights, @@ -100,7 +100,7 @@ class AssertBroadcastableTest(test.TestCase): values_placeholder = array_ops.placeholder(dtypes_lib.float32) dynamic_op = weights_broadcast_ops.assert_broadcastable( weights=weights_placeholder, values=values_placeholder) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.OpError, error_msg): dynamic_op.run(feed_dict={ weights_placeholder: weights, @@ -157,7 +157,7 @@ class BroadcastWeightsTest(test.TestCase): values_placeholder = array_ops.placeholder(dtypes_lib.float32) dynamic_op = weights_broadcast_ops.broadcast_weights( weights=weights_placeholder, values=values_placeholder) - with self.test_session(): + with self.cached_session(): self.assertAllEqual(expected, static_op.eval()) self.assertAllEqual(expected, dynamic_op.eval(feed_dict={ weights_placeholder: weights, @@ -227,7 +227,7 @@ class BroadcastWeightsTest(test.TestCase): values_placeholder = array_ops.placeholder(dtypes_lib.float32) dynamic_op = weights_broadcast_ops.broadcast_weights( weights=weights_placeholder, values=values_placeholder) - with self.test_session(): + with self.cached_session(): with self.assertRaisesRegexp(errors_impl.OpError, error_msg): dynamic_op.eval(feed_dict={ weights_placeholder: weights, diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index 60c726d54c..729885169e 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -153,13 +153,13 @@ class XentTest(test.TestCase): self.assertAllCloseAccordingToType(np_backprop, tf_backprop) def testShapeMismatch(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): gen_nn_ops.softmax_cross_entropy_with_logits( [[0., 1.], [2., 3.]], [[0., 1., 0.], [1., 0., 0.]]) def testNotMatrix(self): - with self.test_session(): + with self.cached_session(): with self.assertRaises(ValueError): gen_nn_ops.softmax_cross_entropy_with_logits([0., 1., 2., 3.], [0., 1., 0., 1.]) @@ -180,7 +180,7 @@ class XentTest(test.TestCase): np.array([[0., 0., 0., 1.], [0., .5, .5, 0.]]).astype(np.float64)) def testGradient(self): - with self.test_session() as sess: + with self.cached_session() as sess: l = constant_op.constant( [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], shape=[3, 4], @@ -207,7 +207,7 @@ class XentTest(test.TestCase): self.assertLess(err, 5e-8) def testGradientLabelWithV2(self): - with self.test_session(): + with self.cached_session(): l = constant_op.constant( [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.5], shape=[3, 4], @@ -225,7 +225,7 @@ class XentTest(test.TestCase): self.assertLess(err, 5e-8) def testSecondGradient(self): - with self.test_session() as sess: + with self.cached_session() as sess: l = constant_op.constant( [ 0.0, 0.0, 1.0 / 3, 0.0, 1.0 / 3, 0.0, 0.0, 0.0, 0.0, 0.5 / 3, 0.0, -- GitLab From 7f3938deb393f7688cd364b630afdd9338460299 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 00:33:03 -0700 Subject: [PATCH 0121/1357] [TF] Update strings to run on device:CPU /cpu is an old style and can be misleading for new people trying to specify other devices. Also correct comparison in tensorflow/python/client/timeline_test.py PiperOrigin-RevId: 212769480 --- tensorflow/python/client/timeline_test.py | 2 +- tensorflow/python/framework/test_util.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index c046e9cfd4..03effde098 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -161,7 +161,7 @@ class TimelineTest(test.TestCase): cpu_max = maximums[ 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname] # At least num1 + num2, both float32s (4 bytes each) - self.assertGreater(cpu_max.num_bytes, 8) + self.assertGreaterEqual(cpu_max.num_bytes, 8) self.assertGreater(cpu_max.timestamp, 0) self.assertTrue('num1' in cpu_max.tensors or 'num1/read' in cpu_max.tensors) self.assertTrue('num2' in cpu_max.tensors or 'num2/read' in cpu_max.tensors) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 1cc3bb4628..b7398238f5 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -779,7 +779,7 @@ def run_in_graph_and_eager_modes(func=None, def run_eagerly(self, **kwargs): if not use_gpu: - with ops.device("/cpu:0"): + with ops.device("/device:CPU:0"): f(self, **kwargs) else: f(self, **kwargs) @@ -1839,7 +1839,7 @@ class TensorFlowTestCase(googletest.TestCase): elif use_gpu: yield sess else: - with sess.graph.device("/cpu:0"): + with sess.graph.device("/device:CPU:0"): yield sess def _create_session(self, graph, config, force_gpu): @@ -1854,12 +1854,18 @@ class TensorFlowTestCase(googletest.TestCase): Returns: A config_pb2.ConfigProto object. """ + # TODO(b/114333779): Enforce allow_soft_placement=False when + # use_gpu=False. Currently many tests rely on the fact that any device + # will be used even when a specific device is supposed to be used. + allow_soft_placement = not force_gpu if config is None: config = config_pb2.ConfigProto() - config.allow_soft_placement = not force_gpu + config.allow_soft_placement = allow_soft_placement config.gpu_options.per_process_gpu_memory_fraction = 0.3 - elif force_gpu and config.allow_soft_placement: - config = config_pb2.ConfigProto().CopyFrom(config) + elif not allow_soft_placement and config.allow_soft_placement: + config_copy = config_pb2.ConfigProto() + config_copy.CopyFrom(config) + config = config_copy config.allow_soft_placement = False # Don't perform optimizations for tests so we don't inadvertently run # gpu ops on cpu -- GitLab From ce9b23070638094022036656e5d1fbf3e23b74c6 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Thu, 13 Sep 2018 11:24:37 +0300 Subject: [PATCH 0122/1357] Add forgotten ignite_byte_swapper.h --- .../ignite/kernels/ignite_byte_swapper.h | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h new file mode 100644 index 0000000000..986bedcf69 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ + +#include + +namespace tensorflow { + +class ByteSwapper { + public: + ByteSwapper(bool big_endian) { + int x = 1; + bool is_little_endian = (*(char *)&x == 1); + swap_ = big_endian == is_little_endian; + } + + inline void SwapIfRequiredInt16(int16_t *x) const { + if (swap_) { + Swap16(x); + } + } + + inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const { + if (swap_) { + Swap16(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt32(int32_t *x) const { + if (swap_) { + Swap32(x); + } + } + + inline void SwapIfRequiredFloat(float *x) const { + if (swap_) { + Swap32(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt64(int64_t *x) const { + if (swap_) { + Swap64(x); + } + } + + inline void SwapIfRequiredDouble(double *x) const { + if (swap_) { + Swap64(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt16Arr(int16_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap16(&x[i]); + } + } + + inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x, + int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap16(reinterpret_cast(&x[i])); + } + } + + inline void SwapIfRequiredInt32Arr(int32_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap32(&x[i]); + } + } + + inline void SwapIfRequiredFloatArr(float *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap32(reinterpret_cast(&x[i])); + } + } + + inline void SwapIfRequiredInt64Arr(int64_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap64(&x[i]); + } + } + + inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap64(reinterpret_cast(&x[i])); + } + } + + private: + inline void Swap16(int16_t *x) const { + *x = ((*x & 0xFF) << 8) | ((*x >> 8) & 0xFF); + } + + inline void Swap32(int32_t *x) const { + *x = ((*x & 0xFF) << 24) | (((*x >> 8) & 0xFF) << 16) | + (((*x >> 16) & 0xFF) << 8) | ((*x >> 24) & 0xFF); + } + + inline void Swap64(int64_t *x) const { + *x = ((*x & 0xFF) << 56) | (((*x >> 8) & 0xFF) << 48) | + (((*x >> 16) & 0xFF) << 40) | (((*x >> 24) & 0xFF) << 32) | + (((*x >> 32) & 0xFF) << 24) | (((*x >> 40) & 0xFF) << 16) | + (((*x >> 48) & 0xFF) << 8) | ((*x >> 56) & 0xFF); + } + + bool swap_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ -- GitLab From 567de999ae29a2cfb30132f82178006fe5688d6b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 01:35:35 -0700 Subject: [PATCH 0123/1357] Change test to use 2 CPU devices instead of GPU. General cleanup: testDeviceInAndOutOfCond uses a GPU in a CPU only test build resulting in all operations run on the same device even though the graph is for multiple devices. PiperOrigin-RevId: 212775360 --- tensorflow/python/kernel_tests/cond_v2_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py index 18a1b230a0..a1efecf28a 100644 --- a/tensorflow/python/kernel_tests/cond_v2_test.py +++ b/tensorflow/python/kernel_tests/cond_v2_test.py @@ -892,11 +892,13 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase): def testDeviceInAndOutOfCond(self): with ops.Graph().as_default() as g: - with self.test_session(graph=g): + with self.test_session( + graph=g, config=config_pb2.ConfigProto(device_count={"CPU": 2})): + def fn2(): - with ops.device("/device:GPU:0"): + with ops.device("/device:CPU:1"): c = constant_op.constant(3.0) - self.assertEqual("/device:GPU:0", c.op.device) + self.assertEqual("/device:CPU:1", c.op.device) return c with ops.device("/device:CPU:0"): -- GitLab From c1de96776067f96da55f8d4709fe5a3c50cccd4b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 01:55:22 -0700 Subject: [PATCH 0124/1357] Use remote builds for the XLA GPU presubmit with gcc/nvcc. PiperOrigin-RevId: 212776966 --- third_party/toolchains/BUILD | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD index ec1006fe23..4303751452 100644 --- a/third_party/toolchains/BUILD +++ b/third_party/toolchains/BUILD @@ -20,3 +20,18 @@ platform( value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c" }""", ) + +platform( + name = "rbe_cuda9.0-cudnn7-ubuntu14.04", + constraint_values = [ + "@bazel_tools//platforms:x86_64", + "@bazel_tools//platforms:linux", + "@bazel_tools//tools/cpp:clang", + "@bazel_toolchains//constraints:xenial", + ], + remote_execution_properties = """ + properties: { + name: "container-image" + value:"docker://gcr.io/asci-toolchain/nosla-cuda9.0-cudnn7-ubuntu14.04@sha256:ae58329b961e7c17d89725bf8fd72dfbd5850f4f3313de58e0cafbf5b0343735" + }""", +) -- GitLab From da02a441f4a96ddb47579a52fbbf50d501d72b53 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 02:02:50 -0700 Subject: [PATCH 0125/1357] compat: Update forward compatibility horizon to 2018-09-13 PiperOrigin-RevId: 212777606 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 550017653a..1a1ed04e0d 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 12) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 13) @tf_export("compat.forward_compatible") -- GitLab From d797e99a043e01609583a37c04e1e509d126e1a0 Mon Sep 17 00:00:00 2001 From: dmitrievanthony Date: Thu, 13 Sep 2018 09:42:16 +0000 Subject: [PATCH 0126/1357] Fix windows build. --- .../contrib/ignite/kernels/ignite_plain_client_windows.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 9cd08a7779..17f2bf45d1 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -24,6 +24,7 @@ limitations under the License. #pragma comment(lib, "Mswsock.lib") #pragma comment(lib, "AdvApi32.lib") +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" @@ -58,7 +59,7 @@ Status PlainClient::Connect() { &result); if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); - auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); }); + auto clean = gtl::MakeCleanup([result] { freeaddrinfo(result); }); for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); -- GitLab From c8b60b894b91cfdb4176176d7dcf328d2b40b41f Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Thu, 13 Sep 2018 16:34:59 +0300 Subject: [PATCH 0127/1357] Fix code style. --- .../ignite/kernels/ignite_byte_swapper.h | 18 +++++++++--------- .../ignite/kernels/ignite_dataset_ops.cc | 2 +- .../kernels/ignite_plain_client_windows.cc | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h index 986bedcf69..5b42de4c5a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -36,7 +36,7 @@ class ByteSwapper { inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const { if (swap_) { - Swap16(reinterpret_cast(x)); + Swap16(reinterpret_cast(x)); } } @@ -48,7 +48,7 @@ class ByteSwapper { inline void SwapIfRequiredFloat(float *x) const { if (swap_) { - Swap32(reinterpret_cast(x)); + Swap32(reinterpret_cast(x)); } } @@ -60,7 +60,7 @@ class ByteSwapper { inline void SwapIfRequiredDouble(double *x) const { if (swap_) { - Swap64(reinterpret_cast(x)); + Swap64(reinterpret_cast(x)); } } @@ -73,8 +73,8 @@ class ByteSwapper { inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x, int32_t length) const { if (swap_) { - for (int32_t i = 0; i < length; i++) - Swap16(reinterpret_cast(&x[i])); + for (int32_t i = 0; i < length; i++) + Swap16(reinterpret_cast(&x[i])); } } @@ -86,8 +86,8 @@ class ByteSwapper { inline void SwapIfRequiredFloatArr(float *x, int32_t length) const { if (swap_) { - for (int32_t i = 0; i < length; i++) - Swap32(reinterpret_cast(&x[i])); + for (int32_t i = 0; i < length; i++) + Swap32(reinterpret_cast(&x[i])); } } @@ -99,8 +99,8 @@ class ByteSwapper { inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const { if (swap_) { - for (int32_t i = 0; i < length; i++) - Swap64(reinterpret_cast(&x[i])); + for (int32_t i = 0; i < length; i++) + Swap64(reinterpret_cast(&x[i])); } } diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index eeb29ef30b..e48fce4ed2 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" #include #include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" #include "tensorflow/core/framework/dataset.h" namespace tensorflow { diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 17f2bf45d1..43d6108c34 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -24,8 +24,8 @@ limitations under the License. #pragma comment(lib, "Mswsock.lib") #pragma comment(lib, "AdvApi32.lib") -#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { -- GitLab From 5f28bab20d303e9f815bbe8611c24b7f751e6f9e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 06:44:19 -0700 Subject: [PATCH 0128/1357] Avoid excessive cpu<->gpu memory swaps, compute shape ops on the CPU. This results in +10% perf improvement for tensor2tensor Transformer model training step times, and +37% perf improvement for tensor2tensor Transformer model decoding. PiperOrigin-RevId: 212804933 --- tensorflow/python/ops/math_ops.py | 34 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index acd5a32e82..7c59232e40 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2903,22 +2903,24 @@ def tensordot(a, b, axes, name=None): free_dims_static = None shape_a = array_ops.shape(a) rank_a = array_ops.rank(a) - axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") - axes = cast(axes >= 0, dtypes.int32) * axes + cast( - axes < 0, dtypes.int32) * ( - axes + rank_a) - free, _ = array_ops.setdiff1d(range(rank_a), axes) - free_dims = array_ops.gather(shape_a, free) - axes_dims = array_ops.gather(shape_a, axes) - prod_free_dims = reduce_prod(free_dims) - prod_axes_dims = reduce_prod(axes_dims) - perm = array_ops.concat([axes_dims, free_dims], 0) - if flipped: - perm = array_ops.concat([axes, free], 0) - new_shape = array_ops.stack([prod_axes_dims, prod_free_dims]) - else: - perm = array_ops.concat([free, axes], 0) - new_shape = array_ops.stack([prod_free_dims, prod_axes_dims]) + # TODO(b/115583659): Automate this. + with ops.device("/cpu:0"): + axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") + axes = cast(axes >= 0, dtypes.int32) * axes + cast( + axes < 0, dtypes.int32) * ( + axes + rank_a) + free, _ = array_ops.setdiff1d(range(rank_a), axes) + free_dims = array_ops.gather(shape_a, free) + axes_dims = array_ops.gather(shape_a, axes) + prod_free_dims = reduce_prod(free_dims) + prod_axes_dims = reduce_prod(axes_dims) + perm = array_ops.concat([axes_dims, free_dims], 0) + if flipped: + perm = array_ops.concat([axes, free], 0) + new_shape = array_ops.stack([prod_axes_dims, prod_free_dims]) + else: + perm = array_ops.concat([free, axes], 0) + new_shape = array_ops.stack([prod_free_dims, prod_axes_dims]) reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape) return reshaped_a, free_dims, free_dims_static -- GitLab From 46aa7cf45c62d193f56f55d7d2ffc5baf7af3b65 Mon Sep 17 00:00:00 2001 From: James Keeling Date: Thu, 13 Sep 2018 06:52:12 -0700 Subject: [PATCH 0129/1357] Replace iter->second with partition_graph in DirectSession::Run This loop uses an iterator. It takes references to iter->first and iter->second right at the top of the loop and uses these references throughout, except for this line, which I've fixed. PiperOrigin-RevId: 212805731 --- tensorflow/core/common_runtime/direct_session.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index eb388202fa..b4d8e285bd 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1228,7 +1228,7 @@ Status DirectSession::CreateExecutors( } }; - optimizer.Optimize(lib, options_.env, device, &iter->second, + optimizer.Optimize(lib, options_.env, device, &partition_graph, /*shape_map=*/nullptr); // TensorFlow Debugger (tfdbg) inserts debug nodes in the graph. -- GitLab From 226cc7c47e2df8682b384aef5c54836948caecb3 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 13 Sep 2018 07:26:18 -0700 Subject: [PATCH 0130/1357] Allow unsupported comparison operators to be passed through and scale back the coverage of overloads. It's up for discussion whether we allow overloading everything or let the users rely on the existing operator overloading mechanisms instead. The one case that we do want to support is the equality operator. PiperOrigin-RevId: 212809447 --- .../converters/logical_expressions.py | 21 ++++++++++---- .../converters/logical_expressions_test.py | 10 +++---- tensorflow/python/autograph/utils/__init__.py | 2 -- .../autograph/utils/multiple_dispatch.py | 10 ------- .../autograph/utils/multiple_dispatch_test.py | 29 ------------------- 5 files changed, 19 insertions(+), 53 deletions(-) diff --git a/tensorflow/python/autograph/converters/logical_expressions.py b/tensorflow/python/autograph/converters/logical_expressions.py index ac42ee2c33..8c4d53f9a8 100644 --- a/tensorflow/python/autograph/converters/logical_expressions.py +++ b/tensorflow/python/autograph/converters/logical_expressions.py @@ -57,8 +57,6 @@ class LogicalExpressionTransformer(converter.Base): gast.NotEq: 'tf.not_equal', gast.Or: 'tf.logical_or', gast.USub: 'tf.negative', - gast.Is: 'ag__.utils.dynamic_is', - gast.IsNot: 'ag__.utils.dynamic_is_not' } def _expect_simple_symbol(self, operand): @@ -72,12 +70,13 @@ class LogicalExpressionTransformer(converter.Base): '"a.x or b"; for a workaround, assign the expression to a local ' 'variable and use that instead, for example "tmp = a.x", "tmp or b"') + def _has_matching_func(self, operator): + op_type = type(operator) + return op_type in self.op_mapping + def _matching_func(self, operator): op_type = type(operator) - mapped_op = self.op_mapping.get(op_type) - if not mapped_op: - raise NotImplementedError('operator %s is not yet supported' % op_type) - return mapped_op + return self.op_mapping[op_type] def _as_function(self, func_name, args): template = """ @@ -90,6 +89,16 @@ class LogicalExpressionTransformer(converter.Base): def visit_Compare(self, node): node = self.generic_visit(node) + + if not all(self._has_matching_func(op) for op in node.ops): + if len(node.ops) == 1: + # Basic expressions are safe to leave as they are. + return node + else: + raise NotImplementedError( + 'compound expression with at least one unsupported ' + 'operator: {}'.format(node.ops)) + ops_and_comps = list(zip(node.ops, node.comparators)) left = node.left op_tree = None diff --git a/tensorflow/python/autograph/converters/logical_expressions_test.py b/tensorflow/python/autograph/converters/logical_expressions_test.py index 5fb3fb992f..b78b4d3a6a 100644 --- a/tensorflow/python/autograph/converters/logical_expressions_test.py +++ b/tensorflow/python/autograph/converters/logical_expressions_test.py @@ -47,14 +47,12 @@ class GradientsFunctionTest(converter_testing.TestCase): with self.cached_session() as sess: self.assertTrue(sess.run(result.test_fn(True, False, True))) - def test_ag_utils_lookup(self): + def test_unsupported_ops(self): def test_fn(a, b): - return a is b or a is not b + return a in b - with self.converted(test_fn, logical_expressions, {}, math_ops.logical_or - ) as result: - with self.cached_session() as sess: - self.assertTrue(sess.run(result.test_fn(True, False))) + with self.converted(test_fn, logical_expressions, {}) as result: + self.assertTrue(result.test_fn('a', ('a',))) if __name__ == '__main__': diff --git a/tensorflow/python/autograph/utils/__init__.py b/tensorflow/python/autograph/utils/__init__.py index e38c82a079..c781958481 100644 --- a/tensorflow/python/autograph/utils/__init__.py +++ b/tensorflow/python/autograph/utils/__init__.py @@ -20,8 +20,6 @@ from __future__ import print_function from tensorflow.python.autograph.utils.context_managers import control_dependency_on_returns from tensorflow.python.autograph.utils.misc import alias_tensors -from tensorflow.python.autograph.utils.multiple_dispatch import dynamic_is -from tensorflow.python.autograph.utils.multiple_dispatch import dynamic_is_not from tensorflow.python.autograph.utils.multiple_dispatch import run_cond from tensorflow.python.autograph.utils.py_func import wrap_py_func from tensorflow.python.autograph.utils.tensor_list import dynamic_list_append diff --git a/tensorflow/python/autograph/utils/multiple_dispatch.py b/tensorflow/python/autograph/utils/multiple_dispatch.py index 33f521db2c..107c8f7a68 100644 --- a/tensorflow/python/autograph/utils/multiple_dispatch.py +++ b/tensorflow/python/autograph/utils/multiple_dispatch.py @@ -22,16 +22,6 @@ from tensorflow.python.autograph.utils.type_check import is_tensor from tensorflow.python.ops import control_flow_ops -def dynamic_is(left, right): - # TODO(alexbw) if we're sure we should leave 'is' in place, - # then change the semantics in converters/logical_expressions.py - return left is right - - -def dynamic_is_not(left, right): - return left is not right - - def run_cond(condition, true_fn, false_fn): """Type-dependent functional conditional. diff --git a/tensorflow/python/autograph/utils/multiple_dispatch_test.py b/tensorflow/python/autograph/utils/multiple_dispatch_test.py index ed20822529..2a77c895ce 100644 --- a/tensorflow/python/autograph/utils/multiple_dispatch_test.py +++ b/tensorflow/python/autograph/utils/multiple_dispatch_test.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - from tensorflow.python.autograph.utils import multiple_dispatch from tensorflow.python.client.session import Session from tensorflow.python.framework.constant_op import constant @@ -28,33 +26,6 @@ from tensorflow.python.platform import test class MultipleDispatchTest(test.TestCase): - def test_dynamic_is_python(self): - a = np.eye(3) - also_a = a - not_actually_a = np.eye(3) - should_be_true1 = multiple_dispatch.dynamic_is(a, also_a) - should_be_false1 = multiple_dispatch.dynamic_is_not(a, also_a) - should_be_true2 = multiple_dispatch.dynamic_is_not(a, not_actually_a) - should_be_false2 = multiple_dispatch.dynamic_is(a, not_actually_a) - self.assertTrue(should_be_true1) - self.assertTrue(should_be_true2) - self.assertFalse(should_be_false1) - self.assertFalse(should_be_false2) - - def test_dynamic_is_tf(self): - with Session().as_default(): - a = constant([2.0]) - also_a = a - not_actually_a = constant([2.0]) - should_be_true1 = multiple_dispatch.dynamic_is(a, also_a) - should_be_false1 = multiple_dispatch.dynamic_is_not(a, also_a) - should_be_true2 = multiple_dispatch.dynamic_is_not(a, not_actually_a) - should_be_false2 = multiple_dispatch.dynamic_is(a, not_actually_a) - self.assertTrue(should_be_true1) - self.assertTrue(should_be_true2) - self.assertFalse(should_be_false1) - self.assertFalse(should_be_false2) - def test_run_cond_python(self): true_fn = lambda: (2,) false_fn = lambda: (3,) -- GitLab From cd06ad2516cba760d875f77f43f20021e3560036 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 13 Sep 2018 14:33:04 +0000 Subject: [PATCH 0131/1357] Update code owner for S3 file system Add myself so that I could be assigned for issues and PRs in S3 file systems. Signed-off-by: Yong Tang --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index 78f80c8d71..0d208eca77 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -2,6 +2,7 @@ /tenosrflow/core/debug @caisq /tensorflow/core/platform/windows/ @mrry +/tensorflow/core/platform/s3 @yongtang /tensorflow/go @asimshankar /tensorflow/java/ @asimshankar /tensorflow/python/debug @caisq -- GitLab From f57ea2399e96131d26dedadd901fa852685e23a1 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 13 Sep 2018 14:35:21 +0000 Subject: [PATCH 0132/1357] Update code owner for contrib/{kafka,kinesis} Add myself so that issues or PRs could be assigned to me. Note contrib/{kafka,kinesis} might be moved: https://github.com/tensorflow/community/pull/18 Signed-off-by: Yong Tang --- CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index 0d208eca77..b612bccffb 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -31,9 +31,12 @@ /tensorflow/contrib/gan/ @joel-shor /tensorflow/contrib/graph_editor/ @purpledog # NEED OWNER: /tensorflow/contrib/grid_rnn/ +/tensorflow/contrib/hadoop @yongtang /tensorflow/contrib/hvx/ @satok16 /tensorflow/contrib/integrate/ @shoyer +/tensorflow/contrib/kafka @yongtang /tensorflow/contrib/kernel_methods/ @petrosmol +/tensorflow/contrib/kinesis @yongtang /tensorflow/contrib/ios_examples/ @petewarden /tensorflow/contrib/labeled_tensor/ @shoyer /tensorflow/contrib/layers/ @fchollet @martinwicke -- GitLab From c513c04aed8790c78c46b78f90ec848555498ce4 Mon Sep 17 00:00:00 2001 From: dmitrievanthony Date: Thu, 13 Sep 2018 15:13:54 +0000 Subject: [PATCH 0133/1357] Add -DWIN32_LEAN_AND_MEAN option into BUILD. --- tensorflow/contrib/ignite/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index 2f598b4aed..1adc6c6ccc 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -61,6 +61,9 @@ cc_library( "@boringssl//:ssl", "@protobuf_archive//:protobuf_headers", ], + copts = if_windows([ + "-DWIN32_LEAN_AND_MEAN", + ]), alwayslink = 1, ) -- GitLab From 7453b0b1cee3d251106684876bc9d639235f5c4a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 13 Sep 2018 08:09:39 -0700 Subject: [PATCH 0134/1357] Updates TensorFlow landing pages to make description and code block widths consistent at all breakpoints. PiperOrigin-RevId: 212814483 --- tensorflow/contrib/lite/g3doc/_index.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml index 9119e49117..b3f21e21ac 100644 --- a/tensorflow/contrib/lite/g3doc/_index.yaml +++ b/tensorflow/contrib/lite/g3doc/_index.yaml @@ -5,7 +5,8 @@ landing_page: rows: - heading: TensorFlow Lite is a lightweight solution for mobile and embedded devices. items: - - description: > + - classname: devsite-landing-row-50 + description: > TensorFlow Lite is TensorFlow’s lightweight solution for mobile and embedded devices. It enables on-device machine learning inference with low latency and a small binary size. TensorFlow Lite also supports @@ -33,7 +34,7 @@ landing_page: icon_name: chevron_right foreground: theme background: grey - - code_block: | + code_block: |
         $ toco --input_file=$(pwd)/mobilenet_v1_1.0_224/frozen_graph.pb \
                --input_format=TENSORFLOW_GRAPHDEF \
-- 
GitLab


From 8a6c83656a2197309dacba124944c665530dd218 Mon Sep 17 00:00:00 2001
From: "William D. Irons" 
Date: Thu, 13 Sep 2018 11:19:41 -0400
Subject: [PATCH 0135/1357] Move ppc64le build/test to cuda 9.2

CUDA 9.2 is the first version of cuda that nvidia will support on
Power9 hardware.
The dockerfile is used in the jenkins build of the pp64le whl file
and in the CI/CD test.
---
 tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
index e026edb6bb..0a55b84ac4 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le
@@ -1,4 +1,4 @@
-FROM nvidia/cuda-ppc64le:9.0-cudnn7-devel-ubuntu16.04
+FROM nvidia/cuda-ppc64le:9.2-cudnn7-devel-ubuntu16.04
 
 LABEL maintainer="William Irons "
 
@@ -26,6 +26,8 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 # Configure the build for our CUDA configuration.
 ENV TF_NEED_CUDA 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES 3.0
+ENV TF_CUDA_VERSION 9.2
+ENV CUDA_TOOLKIT_PATH /usr/local/cuda-9.2
 
 # TODO get NCCL 2 in the docker image
 ENV TF_NCCL_VERSION 1
-- 
GitLab


From a4bf3d0935570762e9d60eb917d8f42be7e398b4 Mon Sep 17 00:00:00 2001
From: Mark Heffernan 
Date: Thu, 13 Sep 2018 09:01:27 -0700
Subject: [PATCH 0136/1357] Add HloModuleGroup abstraction. This CL adds
 HloModuleGroup which is a simple container of HLO modules. The module group
 gathers together HLO modules which are built to run concurrently across
 multiple devices. This cl just adds the container class. Later CLs will tie
 this into other parts of XLA including adding HloModuleGroup HLO passes which
 operate on an entire module group.

PiperOrigin-RevId: 212821390
---
 tensorflow/compiler/xla/service/BUILD         |  31 ++++
 tensorflow/compiler/xla/service/hlo.proto     |   7 +
 .../compiler/xla/service/hlo_module_group.cc  |  91 +++++++++++
 .../compiler/xla/service/hlo_module_group.h   |  81 ++++++++++
 .../xla/service/hlo_module_group_test.cc      | 142 ++++++++++++++++++
 .../compiler/xla/service/hlo_module_test.cc   |   1 -
 6 files changed, 352 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_group.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_group.h
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_group_test.cc

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 17a557ccc3..fb80c78f68 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1146,6 +1146,37 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "hlo_module_group",
+    srcs = ["hlo_module_group.cc"],
+    hdrs = ["hlo_module_group.h"],
+    deps = [
+        ":hlo",
+        ":hlo_proto",
+        "//tensorflow/compiler/xla:statusor",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_test(
+    name = "hlo_module_group_test",
+    srcs = ["hlo_module_group_test.cc"],
+    deps = [
+        ":hlo",
+        ":hlo_matchers",
+        ":hlo_module_group",
+        ":hlo_parser",
+        ":hlo_proto",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "hlo_module_group_metadata",
     srcs = ["hlo_module_group_metadata.cc"],
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 93ec2c9438..b19ec12638 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -309,6 +309,13 @@ message HeapSimulatorTrace {
   bool whole_module_simulation = 2;
 }
 
+// An abstraction representing a set of HLO module built to run concurrently
+// across different devices.
+message HloModuleGroupProto {
+  string name = 1;
+  repeated HloModuleProto hlo_modules = 2;
+}
+
 // Serialization of BufferAssignment.
 message BufferAssignmentProto {
   // Alias represents a source LogicalBuffer, and the buffer location that
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.cc b/tensorflow/compiler/xla/service/hlo_module_group.cc
new file mode 100644
index 0000000000..f9b56ef464
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_group.cc
@@ -0,0 +1,91 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
+
+namespace xla {
+
+HloModuleGroup::HloModuleGroup(absl::string_view name,
+                               std::unique_ptr module)
+    : name_(name) {
+  push_back(std::move(module));
+}
+
+HloModuleGroup::HloModuleGroup(absl::string_view name,
+                               absl::Span> modules)
+    : name_(name) {
+  for (auto& module : modules) {
+    push_back(std::move(module));
+  }
+}
+
+std::vector> HloModuleGroup::ConsumeModules() {
+  std::vector> ret_modules = std::move(modules_);
+
+  // Clear everything so the object state is in a known (empty) state.
+  modules_.clear();
+  module_ptrs_.clear();
+  return ret_modules;
+}
+
+string HloModuleGroup::ToString() const {
+  std::ostringstream s;
+  s << "HloModuleGroup " << name() << "\n\n";
+  for (const HloModule* module : modules()) {
+    s << module->ToString() << "\n";
+  }
+  return s.str();
+}
+
+HloModuleGroupProto HloModuleGroup::ToProto() const {
+  HloModuleGroupProto proto;
+  proto.set_name(name());
+  for (const HloModule* module : modules()) {
+    *proto.add_hlo_modules() = module->ToProto();
+  }
+  return proto;
+}
+
+/* static */ StatusOr HloModuleGroup::CreateFromProto(
+    const HloModuleGroupProto& proto,
+    absl::Span module_configs) {
+  TF_RET_CHECK(!proto.name().empty()) << "Module group name cannot be empty";
+  TF_RET_CHECK(proto.hlo_modules_size() > 0)
+      << "Module group must have at least one HLO module";
+  TF_RET_CHECK(proto.hlo_modules_size() == module_configs.size());
+
+  std::vector> modules;
+  for (int i = 0; i < proto.hlo_modules_size(); ++i) {
+    const HloModuleProto& module_proto = proto.hlo_modules(i);
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr module,
+        HloModule::CreateFromProto(module_proto, module_configs[i]));
+    modules.push_back(std::move(module));
+  }
+
+  return HloModuleGroup(proto.name(), absl::MakeSpan(modules));
+}
+
+void HloModuleGroup::push_back(std::unique_ptr module) {
+  modules_.push_back(std::move(module));
+  module_ptrs_.push_back(modules_.back().get());
+}
+
+std::ostream& operator<<(std::ostream& out, const HloModuleGroup& group) {
+  out << group.ToString();
+  return out;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module_group.h b/tensorflow/compiler/xla/service/hlo_module_group.h
new file mode 100644
index 0000000000..7338be8b9c
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_group.h
@@ -0,0 +1,81 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_H_
+
+#include 
+#include 
+#include 
+
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+
+// An abstraction representing a ordered set of HLO module built to run
+// concurrently across different devices.
+class HloModuleGroup {
+ public:
+  // Construct an empty module group.
+  explicit HloModuleGroup(absl::string_view name) : name_(name) {}
+
+  // Construct a module group containing a single module.
+  HloModuleGroup(absl::string_view name, std::unique_ptr module);
+
+  // Construct a module group containing any number of modules.
+  HloModuleGroup(absl::string_view name,
+                 absl::Span> modules);
+
+  // Returns the modules contained in the group.
+  const std::vector& modules() const { return module_ptrs_; }
+
+  // Returns a module at a particular index.
+  HloModule& module(int index) const { return *module_ptrs_.at(index); }
+
+  // Add a module to the back of vector of modules in the group.
+  void push_back(std::unique_ptr module);
+
+  // Moves all modules from the group into the returned vector. After this
+  // method runs, the module group will be empty.
+  std::vector> ConsumeModules();
+
+  string name() const { return name_; }
+  string ToString() const;
+
+  // Serialize the module group to/from a proto.
+  HloModuleGroupProto ToProto() const;
+  static StatusOr CreateFromProto(
+      const HloModuleGroupProto& proto,
+      absl::Span module_configs);
+
+ private:
+  string name_;
+
+  // Vector of modules as std::unique_ptrs.
+  std::vector> modules_;
+
+  // Vector of modules as normal pointers. This vector is kept in sync with
+  // modules_ as modules are added to the group with push_back.
+  std::vector module_ptrs_;
+};
+
+std::ostream& operator<<(std::ostream& out, const HloModuleGroup& group);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_H_
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
new file mode 100644
index 0000000000..ebf790ba6f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc
@@ -0,0 +1,142 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_module_group.h"
+
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+
+namespace {
+
+namespace op = ::xla::testing::opcode_matchers;
+
+class HloModuleGroupTest : public HloTestBase {
+ protected:
+  HloModuleGroupTest() = default;
+};
+
+TEST_F(HloModuleGroupTest, SingleModule) {
+  const string text = R"(
+HloModule simple_module
+
+ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module,
+                          ParseHloString(text));
+  HloModuleGroup group(TestName(), std::move(module));
+
+  EXPECT_EQ(group.modules().size(), 1);
+  EXPECT_THAT(
+      group.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+
+  TF_ASSERT_OK_AND_ASSIGN(HloModuleGroup group_copy,
+                          HloModuleGroup::CreateFromProto(
+                              group.ToProto(), {group.module(0).config()}));
+  EXPECT_EQ(group_copy.modules().size(), 1);
+  EXPECT_THAT(
+      group_copy.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+
+  std::vector> modules = group.ConsumeModules();
+  EXPECT_EQ(modules.size(), 1);
+  EXPECT_EQ(group.modules().size(), 0);
+}
+
+TEST_F(HloModuleGroupTest, MultipleModules) {
+  const string text_0 = R"(
+HloModule module0
+
+ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+)";
+  const string text_1 = R"(
+HloModule module1
+
+ENTRY %entry (a: f32[]) -> f32[] {
+  ROOT %a = f32[] parameter(0)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module_0,
+                          ParseHloString(text_0));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module_1,
+                          ParseHloString(text_1));
+  std::vector> modules;
+  modules.push_back(std::move(module_0));
+  modules.push_back(std::move(module_1));
+  HloModuleGroup group(TestName(), absl::MakeSpan(modules));
+  EXPECT_EQ(group.modules().size(), 2);
+  EXPECT_THAT(
+      group.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+  EXPECT_THAT(group.module(1).entry_computation()->instructions(),
+              ::testing::ElementsAre(op::Parameter()));
+
+  TF_ASSERT_OK_AND_ASSIGN(HloModuleGroup group_copy,
+                          HloModuleGroup::CreateFromProto(
+                              group.ToProto(), {group.module(0).config(),
+                                                group.module(1).config()}));
+  EXPECT_EQ(group_copy.modules().size(), 2);
+}
+
+TEST_F(HloModuleGroupTest, BuildModuleGroupByPushBack) {
+  const string text_0 = R"(
+HloModule module0
+
+ENTRY %entry (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+)";
+  const string text_1 = R"(
+HloModule module1
+
+ENTRY %entry (a: f32[]) -> f32[] {
+  ROOT %a = f32[] parameter(0)
+}
+)";
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module_0,
+                          ParseHloString(text_0));
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module_1,
+                          ParseHloString(text_1));
+  HloModuleGroup group(TestName());
+  group.push_back(std::move(module_0));
+  group.push_back(std::move(module_1));
+
+  EXPECT_EQ(group.modules().size(), 2);
+  EXPECT_THAT(
+      group.module(0).entry_computation()->instructions(),
+      ::testing::ElementsAre(op::Parameter(), op::Parameter(), op::Add()));
+  EXPECT_THAT(group.module(1).entry_computation()->instructions(),
+              ::testing::ElementsAre(op::Parameter()));
+}
+
+}  // namespace
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module_test.cc b/tensorflow/compiler/xla/service/hlo_module_test.cc
index 6243943420..39f38b417a 100644
--- a/tensorflow/compiler/xla/service/hlo_module_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_test.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/test.h"
 
-- 
GitLab


From 88a7c5b98fc1ccb56134003ba3dc88a09385c0a7 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer 
Date: Thu, 13 Sep 2018 09:33:24 -0700
Subject: [PATCH 0137/1357] [TF:XLA] Make DataTypeToPrimitiveType work with all
 quantized types supported by TF

PiperOrigin-RevId: 212826065
---
 .../compiler/tf2xla/literal_util_test.cc      | 85 +++++++++++--------
 tensorflow/compiler/tf2xla/type_util.cc       | 11 ++-
 2 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/literal_util_test.cc b/tensorflow/compiler/tf2xla/literal_util_test.cc
index ed452bceeb..15f4c38da2 100644
--- a/tensorflow/compiler/tf2xla/literal_util_test.cc
+++ b/tensorflow/compiler/tf2xla/literal_util_test.cc
@@ -22,48 +22,61 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
+namespace {
 
 TEST(LiteralUtil, LiteralToHostTensor) {
   // int64 literal can only be converted to an int64 host tensor.
-  {
-    std::vector int64_values = {1, 2, 3};
-    xla::Literal int64_values_literal =
-        xla::LiteralUtil::CreateR1(absl::Span(int64_values));
-    Tensor host_tensor;
-    EXPECT_EQ("Cannot convert literal of type S64 to tensor of type int32",
-              LiteralToHostTensor(int64_values_literal, DT_INT32, &host_tensor)
-                  .error_message());
-    EXPECT_EQ("Cannot convert literal of type S64 to tensor of type qint32",
-              LiteralToHostTensor(int64_values_literal, DT_QINT32, &host_tensor)
-                  .error_message());
-    EXPECT_TRUE(
-        LiteralToHostTensor(int64_values_literal, DT_INT64, &host_tensor).ok());
-    test::ExpectTensorEqual(host_tensor,
-                                   test::AsTensor(int64_values));
-  }
+  std::vector int64_values = {1, 2, 3};
+  xla::Literal int64_values_literal =
+      xla::LiteralUtil::CreateR1(absl::Span(int64_values));
+  Tensor host_tensor;
+  EXPECT_EQ("Cannot convert literal of type S64 to tensor of type int32",
+            LiteralToHostTensor(int64_values_literal, DT_INT32, &host_tensor)
+                .error_message());
+  EXPECT_EQ("Cannot convert literal of type S64 to tensor of type qint32",
+            LiteralToHostTensor(int64_values_literal, DT_QINT32, &host_tensor)
+                .error_message());
+  EXPECT_TRUE(
+      LiteralToHostTensor(int64_values_literal, DT_INT64, &host_tensor).ok());
+  test::ExpectTensorEqual(host_tensor,
+                                 test::AsTensor(int64_values));
+}
+
+template 
+using LiteralUtilTest = ::testing::Test;
+using Types =
+    ::testing::Types, std::pair,
+                     std::pair, std::pair,
+                     std::pair>;
+
+TYPED_TEST_CASE(LiteralUtilTest, Types);
+
+TYPED_TEST(LiteralUtilTest, LiteralToQuantizedHostTensor) {
+  using int_type = typename TypeParam::first_type;
+  using qint_type = typename TypeParam::second_type;
 
-  {
-    // Repeat tests with int32.
-    Tensor host_tensor;
-    std::vector int32_values = {10, 11};
-    xla::Literal int32_values_literal =
-        xla::LiteralUtil::CreateR1(absl::Span(int32_values));
-    EXPECT_TRUE(
-        LiteralToHostTensor(int32_values_literal, DT_INT32, &host_tensor).ok());
-    test::ExpectTensorEqual(host_tensor,
-                                   test::AsTensor(int32_values));
+  Tensor host_tensor;
+  std::vector int_values = {10, 11};
+  xla::Literal int_values_literal =
+      xla::LiteralUtil::CreateR1(absl::Span(int_values));
+  EXPECT_TRUE(LiteralToHostTensor(int_values_literal,
+                                  DataTypeToEnum::value, &host_tensor)
+                  .ok());
+  test::ExpectTensorEqual(host_tensor,
+                                    test::AsTensor(int_values));
 
-    EXPECT_TRUE(
-        LiteralToHostTensor(int32_values_literal, DT_QINT32, &host_tensor)
-            .ok());
-    std::vector qint32_values = {10, 11};
-    test::ExpectTensorEqual(host_tensor,
-                                    test::AsTensor(qint32_values));
+  EXPECT_TRUE(LiteralToHostTensor(int_values_literal,
+                                  DataTypeToEnum::value,
+                                  &host_tensor)
+                  .ok());
+  std::vector qint_values = {10, 11};
+  test::ExpectTensorEqual(host_tensor,
+                                     test::AsTensor(qint_values));
 
-    EXPECT_EQ("Cannot convert literal of type S32 to tensor of type int64",
-              LiteralToHostTensor(int32_values_literal, DT_INT64, &host_tensor)
-                  .error_message());
-  }
+  EXPECT_EQ(
+      error::INVALID_ARGUMENT,
+      LiteralToHostTensor(int_values_literal, DT_INT64, &host_tensor).code());
 }
 
+}  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc
index c969212a1b..d00b137662 100644
--- a/tensorflow/compiler/tf2xla/type_util.cc
+++ b/tensorflow/compiler/tf2xla/type_util.cc
@@ -26,21 +26,26 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) {
       *type = xla::PRED;
       return Status::OK();
     case tensorflow::DT_INT8:
+    case tensorflow::DT_QINT8:
       *type = xla::S8;
       return Status::OK();
     case tensorflow::DT_INT16:
+    case tensorflow::DT_QINT16:
       *type = xla::S16;
       return Status::OK();
     case tensorflow::DT_INT32:
+    case tensorflow::DT_QINT32:
       *type = xla::S32;
       return Status::OK();
     case tensorflow::DT_INT64:
       *type = xla::S64;
       return Status::OK();
     case tensorflow::DT_UINT8:
+    case tensorflow::DT_QUINT8:
       *type = xla::U8;
       return Status::OK();
     case tensorflow::DT_UINT16:
+    case tensorflow::DT_QUINT16:
       *type = xla::U16;
       return Status::OK();
     case tensorflow::DT_UINT32:
@@ -64,12 +69,6 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) {
     case tensorflow::DT_COMPLEX64:
       *type = xla::C64;
       return Status::OK();
-    case tensorflow::DT_QUINT8:
-      *type = xla::U8;
-      return Status::OK();
-    case tensorflow::DT_QINT32:
-      *type = xla::S32;
-      return Status::OK();
     default:
       return errors::InvalidArgument(
           "Unsupported type in DataTypeToPrimitiveType ",
-- 
GitLab


From 5ae1c93473ae690d4a7b9389b1219179cb2504a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 09:35:01 -0700
Subject: [PATCH 0138/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 212826308
---
 .../internal/optimized/optimized_ops.h        | 688 ++++++++++++------
 .../contrib/lite/kernels/internal/types.h     |  42 +-
 2 files changed, 473 insertions(+), 257 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 2c8e8f90e3..baed8f4993 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -260,16 +260,16 @@ inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) {
   return true;
 }
 
-inline void AddBiasAndEvalActivationFunction(const float* bias_data,
-                                             const Dims<4>& bias_dims,
-                                             float* array_data,
-                                             const Dims<4>& array_dims,
-                                             float output_activation_min,
-                                             float output_activation_max) {
+inline void AddBiasAndEvalActivationFunction(float output_activation_min,
+                                             float output_activation_max,
+                                             const RuntimeShape& bias_shape,
+                                             const float* bias_data,
+                                             const RuntimeShape& array_shape,
+                                             float* array_data) {
 #ifdef USE_NEON
   gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
-  const int bias_size = FlatSize(bias_dims);
-  const int array_size = FlatSize(array_dims);
+  const int bias_size = bias_shape.FlatSize();
+  const int array_size = array_shape.FlatSize();
   TFLITE_DCHECK_EQ((array_size % bias_size), 0);
   float* array_ptr = array_data;
   float* array_end_ptr = array_ptr + array_size;
@@ -319,8 +319,8 @@ inline void AddBiasAndEvalActivationFunction(const float* bias_data,
   }
 #else  // not NEON
   gemmlowp::ScopedProfilingLabel label("AddBiasAndEvalActivationFunction");
-  const int bias_size = FlatSize(bias_dims);
-  const int array_size = FlatSize(array_dims);
+  const int bias_size = bias_shape.FlatSize();
+  const int array_size = array_shape.FlatSize();
   TFLITE_DCHECK_EQ((array_size % bias_size), 0);
   for (int array_offset = 0; array_offset < array_size;
        array_offset += bias_size) {
@@ -333,6 +333,19 @@ inline void AddBiasAndEvalActivationFunction(const float* bias_data,
 #endif
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void AddBiasAndEvalActivationFunction(const float* bias_data,
+                                             const Dims<4>& bias_dims,
+                                             float* array_data,
+                                             const Dims<4>& array_dims,
+                                             float output_activation_min,
+                                             float output_activation_max) {
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   DimsToShape(bias_dims), bias_data,
+                                   DimsToShape(array_dims), array_data);
+}
+
 // Note: This to be converted to RuntimeShapes along with Conv.
 // legacy, for compatibility with old checked-in code
 template 
@@ -1672,12 +1685,16 @@ inline void ShuffledFullyConnected(
 }
 
 template 
-inline void ExtractPatchIntoBufferColumn(
-    const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
-    int stride_width, int stride_height, int pad_width, int pad_height,
-    int in_width, int in_height, int in_depth, int single_buffer_length,
-    int buffer_id, const T* in_data, T* conv_buffer_data, uint8 byte_zero) {
+inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
+                                         int h, int b, int kheight, int kwidth,
+                                         int stride_width, int stride_height,
+                                         int pad_width, int pad_height,
+                                         int in_width, int in_height,
+                                         int in_depth, int single_buffer_length,
+                                         int buffer_id, const T* in_data,
+                                         T* conv_buffer_data, uint8 zero_byte) {
   gemmlowp::ScopedProfilingLabel label("ExtractPatchIntoBufferColumn");
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
   // This chunk of code reshapes all the inputs corresponding to
   // output (b, h, w) to a column vector in conv_buffer(:, buffer_id).
   const int kwidth_times_indepth = kwidth * in_depth;
@@ -1699,7 +1716,7 @@ inline void ExtractPatchIntoBufferColumn(
   const int output_row_offset = (buffer_id * single_buffer_length);
   int out_offset =
       output_row_offset + (h_offset * kwidth + w_offset) * in_depth;
-  int in_offset = Offset(input_dims, 0, iw_start, ih_start, b);
+  int in_offset = Offset(input_shape, b, ih_start, iw_start, 0);
 
   // Express all of the calculations as padding around the input patch.
   const int top_padding = h_offset;
@@ -1713,7 +1730,7 @@ inline void ExtractPatchIntoBufferColumn(
   // patch that are off the edge of the input image.
   if (top_padding > 0) {
     const int top_row_elements = (top_padding * kwidth * in_depth);
-    memset(conv_buffer_data + output_row_offset, byte_zero,
+    memset(conv_buffer_data + output_row_offset, zero_byte,
            (top_row_elements * sizeof(T)));
   }
 
@@ -1730,14 +1747,14 @@ inline void ExtractPatchIntoBufferColumn(
     for (int ih = ih_start; ih < ih_end; ++ih) {
       if (left_padding > 0) {
         const int left_start = (out_offset - (left_padding * in_depth));
-        memset(conv_buffer_data + left_start, byte_zero,
+        memset(conv_buffer_data + left_start, zero_byte,
                (left_padding * in_depth * sizeof(T)));
       }
       memcpy(conv_buffer_data + out_offset, in_data + in_offset,
              single_row_num * sizeof(T));
       if (right_padding > 0) {
         const int right_start = (out_offset + single_row_num);
-        memset(conv_buffer_data + right_start, byte_zero,
+        memset(conv_buffer_data + right_start, zero_byte,
                (right_padding * in_depth * sizeof(T)));
       }
       out_offset += kwidth_times_indepth;
@@ -1752,61 +1769,64 @@ inline void ExtractPatchIntoBufferColumn(
     const int bottom_start =
         output_row_offset +
         ((top_padding + (ih_end - ih_start)) * kwidth * in_depth);
-    memset(conv_buffer_data + bottom_start, byte_zero,
+    memset(conv_buffer_data + bottom_start, zero_byte,
            (bottom_row_elements * sizeof(T)));
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template 
-void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
-                   const Dims<4>& filter_dims, int stride_width,
-                   int stride_height, int dilation_width_factor,
-                   int dilation_height_factor, int pad_width, int pad_height,
-                   const Dims<4>& output_dims, uint8 byte_zero,
-                   T* im2col_data) {
+inline void ExtractPatchIntoBufferColumn(
+    const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth,
+    int stride_width, int stride_height, int pad_width, int pad_height,
+    int in_width, int in_height, int in_depth, int single_buffer_length,
+    int buffer_id, const T* in_data, T* conv_buffer_data, uint8 zero_byte) {
+  ExtractPatchIntoBufferColumn(
+      DimsToShape(input_dims), w, h, b, kheight, kwidth, stride_width,
+      stride_height, pad_width, pad_height, in_width, in_height, in_depth,
+      single_buffer_length, buffer_id, in_data, conv_buffer_data, zero_byte);
+}
+
+template 
+void DilatedIm2col(const ConvParams& params, uint8 zero_byte,
+                   const RuntimeShape& input_shape, const T* input_data,
+                   const RuntimeShape& filter_shape,
+                   const RuntimeShape& output_shape, T* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
   // For dilated convolution, the input pixels are not contiguous therefore we
   // can't use the same opitimizations as Im2Col(). Though note this code would
   // work fine for the non-dilated case too (though likely a bit slower).
   gemmlowp::ScopedProfilingLabel label("DilatedIm2col");
   TFLITE_DCHECK(dilation_width_factor != 1 || dilation_height_factor != 1);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
   TFLITE_DCHECK(im2col_data);
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  MatchingArraySize(output_dims, 0, filter_dims, 3);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  MatchingDim(output_shape, 3, filter_shape, 0);
 
   // Construct the MxN sized im2col matrix.
   // The rows M, are sub-ordered B x H x W
-  Dims<4> row_dims;
-  row_dims.sizes[0] = output_width;
-  row_dims.sizes[1] = output_height;
-  row_dims.sizes[2] = batches;
-  row_dims.sizes[3] = 1;
-  ComputeStrides(&row_dims);
-
+  const RuntimeShape row_shape({1, batches, output_height, output_width});
   // The columns, N, are sub-ordered Kh x Kw x Din
-  Dims<4> col_dims;
-  col_dims.sizes[0] = input_depth;
-  col_dims.sizes[1] = filter_width;
-  col_dims.sizes[2] = filter_height;
-  col_dims.sizes[3] = 1;
-  ComputeStrides(&col_dims);
-
+  const RuntimeShape col_shape({1, filter_height, filter_width, input_depth});
   // Use dimensions M and N to construct dims for indexing directly into im2col
-  Dims<4> im2col_dims;
-  im2col_dims.sizes[0] = FlatSize(col_dims);
-  im2col_dims.sizes[1] = FlatSize(row_dims);
-  im2col_dims.sizes[2] = 1;
-  im2col_dims.sizes[3] = 1;
-  ComputeStrides(&im2col_dims);
+  const RuntimeShape im2col_shape(
+      {1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
 
   // Loop through the output rows (B x H x W)
   for (int batch = 0; batch < batches; ++batch) {
@@ -1814,7 +1834,7 @@ void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
       for (int out_x = 0; out_x < output_width; ++out_x) {
         // Each im2col row is an output pixel. Arrange the input data in this
         // row in an order we can conveniently multiply with the filter data.
-        int row_offset = Offset(row_dims, out_x, out_y, batch, 0);
+        int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
         const int in_x_origin = (out_x * stride_width) - pad_width;
         const int in_y_origin = (out_y * stride_height) - pad_height;
         // Loop through all the pixels of the filter (Kh x Kw)
@@ -1825,25 +1845,25 @@ void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
             // Loop through all the filter pixels in this row.
             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
               const int in_x = in_x_origin + dilation_width_factor * filter_x;
-              int col_offset = Offset(col_dims, 0, filter_x, filter_y, 0);
+              int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
               T* dst = im2col_data +
-                       Offset(im2col_dims, col_offset, row_offset, 0, 0);
+                       Offset(im2col_shape, 0, 0, row_offset, col_offset);
               if ((in_x >= 0) && (in_x < input_width)) {
                 // Filter pixel is within the input, copy the input data.
                 T const* src =
-                    input_data + Offset(input_dims, 0, in_x, in_y, batch);
+                    input_data + Offset(input_shape, batch, in_y, in_x, 0);
                 memcpy(dst, src, input_depth * sizeof(T));
               } else {
                 // Filter pixel is outside the input, zero it out.
-                memset(dst, byte_zero, input_depth * sizeof(T));
+                memset(dst, zero_byte, input_depth * sizeof(T));
               }
             }
           } else {
             // Filter row is outside the input, zero out the entire filter row.
-            int col_offset = Offset(col_dims, 0, 0, filter_y, 0);
-            T* dst =
-                im2col_data + Offset(im2col_dims, col_offset, row_offset, 0, 0);
-            memset(dst, byte_zero, filter_width * input_depth * sizeof(T));
+            int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+            T* dst = im2col_data +
+                     Offset(im2col_shape, 0, 0, row_offset, col_offset);
+            memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
           }
         }
       }
@@ -1851,21 +1871,49 @@ void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template 
-void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
-            int stride_height, int pad_width, int pad_height, int kheight,
-            int kwidth, uint8 byte_zero, T* output_data,
-            const Dims<4>& output_dims) {
+void DilatedIm2col(const T* input_data, const Dims<4>& input_dims,
+                   const Dims<4>& filter_dims, int stride_width,
+                   int stride_height, int dilation_width_factor,
+                   int dilation_height_factor, int pad_width, int pad_height,
+                   const Dims<4>& output_dims, uint8 zero_byte,
+                   T* im2col_data) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+
+  DilatedIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), DimsToShape(output_dims),
+                im2col_data);
+}
+
+template 
+void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte,
+            const RuntimeShape& input_shape, const T* input_data,
+            const RuntimeShape& output_shape, T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Im2col");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = ArraySize(input_dims, 0);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_height = ArraySize(input_dims, 2);
-  const int output_depth = ArraySize(output_dims, 0);
-  const int output_width = ArraySize(output_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = input_shape.Dims(3);
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+  const int output_depth = output_shape.Dims(3);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
 
   int buffer_id = 0;
   // Loop over the output nodes.
@@ -1873,93 +1921,155 @@ void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
     for (int h = 0; h < output_height; ++h) {
       for (int w = 0; w < output_width; ++w) {
         ExtractPatchIntoBufferColumn(
-            input_dims, w, h, b, kheight, kwidth, stride_width, stride_height,
+            input_shape, w, h, b, kheight, kwidth, stride_width, stride_height,
             pad_width, pad_height, input_width, input_height, input_depth,
-            output_depth, buffer_id, input_data, output_data, byte_zero);
+            output_depth, buffer_id, input_data, output_data, zero_byte);
         ++buffer_id;
       }
     }
   }
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template 
+void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width,
+            int stride_height, int pad_width, int pad_height, int kheight,
+            int kwidth, uint8 zero_byte, T* output_data,
+            const Dims<4>& output_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+
+  Im2col(op_params, kheight, kwidth, zero_byte, DimsToShape(input_dims),
+         input_data, DimsToShape(output_dims), output_data);
+}
+
 // legacy, for compatibility with old checked-in code
 template 
 void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
             int pad_width, int pad_height, int kheight, int kwidth,
-            uint8 byte_zero, T* output_data, const Dims<4>& output_dims) {
+            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
   Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
-         kwidth, byte_zero, output_data, output_dims);
+         kwidth, zero_byte, output_data, output_dims);
 }
 
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
   (void)im2col_data;
-  (void)im2col_dims;
+  (void)im2col_shape;
   gemmlowp::ScopedProfilingLabel label("Conv");
 
   // NB: static_cast(0x00000000h) == 0.0f
   const uint8 float_zero_byte = 0x00;
   const float* gemm_input_data = nullptr;
-  const Dims<4>* gemm_input_dims = nullptr;
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+  const RuntimeShape* gemm_input_shape = nullptr;
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
   const bool need_dilated_im2col =
       dilation_width_factor != 1 || dilation_height_factor != 1;
   const bool need_im2col = stride_width != 1 || stride_height != 1 ||
                            filter_width != 1 || filter_height != 1;
   if (need_dilated_im2col) {
-    DilatedIm2col(input_data, input_dims, filter_dims, stride_width,
-                  stride_height, dilation_width_factor, dilation_height_factor,
-                  pad_width, pad_height, output_dims, float_zero_byte,
-                  im2col_data);
+    DilatedIm2col(params, float_zero_byte, input_shape, input_data,
+                  filter_shape, output_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else if (need_im2col) {
     TFLITE_DCHECK(im2col_data);
-    Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
-           pad_height, filter_height, filter_width, float_zero_byte,
-           im2col_data, im2col_dims);
+    Im2col(params, filter_height, filter_width, float_zero_byte, input_shape,
+           input_data, im2col_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else {
     // TODO(aselle): We need to make sure to not send im2col if it is not
     // needed.
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    gemm_input_dims = &input_dims;
+    gemm_input_shape = &input_shape;
   }
 
   const auto im2col_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(gemm_input_data, *gemm_input_dims);
+      MapAsMatrixWithLastDimAsRows(gemm_input_data, *gemm_input_shape);
   const auto filter_matrix_map =
-      MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+      MapAsMatrixWithFirstDimAsCols(filter_data, filter_shape);
   auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+      MapAsMatrixWithLastDimAsRows(output_data, output_shape);
 
   Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
 
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data,
-                                   output_dims, output_activation_min,
-                                   output_activation_max);
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   bias_shape, bias_data, output_shape,
+                                   output_data);
 }
 
-inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
-                       const int8_t* filter_data, const Dims<4>& filter_dims,
-                       const float* bias_data, const Dims<4>& bias_dims,
-                       int stride_width, int stride_height, int pad_width,
-                       int pad_height, float* scaling_factors_ptr,
-                       float output_activation_min, float output_activation_max,
-                       float* output_data, const Dims<4>& output_dims,
-                       int8_t* im2col_data, const Dims<4>& im2col_dims) {
-  const int batch_size = input_dims.sizes[3];
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Conv(const float* input_data, const Dims<4>& input_dims,
+                 const float* filter_data, const Dims<4>& filter_dims,
+                 const float* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 float output_activation_min, float output_activation_max,
+                 float* output_data, const Dims<4>& output_dims,
+                 float* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr,
+                       const RuntimeShape& input_shape,
+                       const int8_t* input_data,
+                       const RuntimeShape& filter_shape,
+                       const int8_t* filter_data,
+                       const RuntimeShape& bias_shape, const float* bias_data,
+                       const RuntimeShape& output_shape, float* output_data,
+                       const RuntimeShape& im2col_shape, int8_t* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4);
+
+  const int batch_size = input_shape.Dims(0);
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
 
   const int8_t* gemm_input_data = nullptr;
   int num_input;
@@ -1970,25 +2080,22 @@ inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
     TFLITE_DCHECK(im2col_data);
     // symmetric quantization assumes zero point of 0.
     const int input_zero_point = 0;
-    Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
-           pad_height, filter_height, filter_width, input_zero_point,
-           im2col_data, im2col_dims);
+
+    Im2col(params, filter_height, filter_width, input_zero_point, input_shape,
+           input_data, im2col_shape, im2col_data);
     gemm_input_data = im2col_data;
-    num_input = im2col_dims.sizes[0] * im2col_dims.sizes[1] *
-                im2col_dims.sizes[2] * im2col_dims.sizes[3];
+    num_input = im2col_shape.FlatSize();
   } else {
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    num_input = input_dims.sizes[0] * input_dims.sizes[1] *
-                input_dims.sizes[2] * input_dims.sizes[3];
+    num_input = input_shape.FlatSize();
   }
 
   // Flatten 4D matrices into 2D matrices for matrix multiplication.
 
   // Flatten so that each filter has its own row.
-  const int filter_rows = filter_dims.sizes[3];
-  const int filter_cols =
-      filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2];
+  const int filter_rows = filter_shape.Dims(0);
+  const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
 
   // In MatrixBatchVectorMultiplyAccumulate, each output value is the
   // dot product of one row of the first matrix with one row of the second
@@ -1998,15 +2105,14 @@ inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
   const int gemm_input_cols = filter_cols;
   const int gemm_input_rows = num_input / gemm_input_cols;
 
-  const int output_cols = output_dims.sizes[0];
-  const int output_rows =
-      output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3];
+  const int output_cols = output_shape.Dims(3);
+  const int output_rows = FlatSizeSkipDim(output_shape, 3);
   TFLITE_DCHECK_EQ(output_cols, filter_rows);
   TFLITE_DCHECK_EQ(output_rows, gemm_input_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_cols);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(3), output_cols);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(2), 1);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(1), 1);
+  TFLITE_DCHECK_EQ(bias_shape.Dims(0), 1);
 
   // MatrixBatchVectorMultiplyAccumulate assumes that each row of the second
   // input matrix has its own scale factor. This code duplicates the scale
@@ -2023,11 +2129,39 @@ inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
       scaling_factors_ptr, /*n_batch=*/gemm_input_rows, output_data,
       /*result_stride=*/1);
 
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data,
-                                   output_dims, output_activation_min,
-                                   output_activation_max);
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   bias_shape, bias_data, output_shape,
+                                   output_data);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims,
+                       const int8_t* filter_data, const Dims<4>& filter_dims,
+                       const float* bias_data, const Dims<4>& bias_dims,
+                       int stride_width, int stride_height, int pad_width,
+                       int pad_height, float* scaling_factors_ptr,
+                       float output_activation_min, float output_activation_max,
+                       float* output_data, const Dims<4>& output_dims,
+                       int8_t* im2col_data, const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  HybridConv(op_params, scaling_factors_ptr, DimsToShape(input_dims),
+             input_data, DimsToShape(filter_dims), filter_data,
+             DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+             output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 template 
 void Conv(const float* input_data, const Dims<4>& input_dims,
           const float* filter_data, const Dims<4>& filter_dims,
@@ -2045,6 +2179,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
        im2col_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 void Conv(const float* input_data, const Dims<4>& input_dims,
@@ -2061,6 +2196,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 void Conv(const float* input_data, const Dims<4>& input_dims,
@@ -2074,27 +2210,33 @@ void Conv(const float* input_data, const Dims<4>& input_dims,
            output_dims, im2col_data, im2col_dims);
 }
 
-inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
-                 int32 input_offset, const uint8* filter_data,
-                 const Dims<4>& filter_dims, int32 filter_offset,
-                 const int32* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int dilation_width_factor,
-                 int dilation_height_factor, int pad_width, int pad_height,
-                 int32 output_offset, int32 output_multiplier, int output_shift,
-                 int32 output_activation_min, int32 output_activation_max,
-                 uint8* output_data, const Dims<4>& output_dims,
-                 uint8* im2col_data, const Dims<4>& im2col_dims,
-                 gemmlowp::GemmContext* gemm_context) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const uint8* input_data, const RuntimeShape& filter_shape,
+                 const uint8* filter_data, const RuntimeShape& bias_shape,
+                 const int32* bias_data, const RuntimeShape& output_shape,
+                 uint8* output_data, const RuntimeShape& im2col_shape,
+                 uint8* im2col_data, gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("Conv/8bit");
-
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4);
 
   const uint8* gemm_input_data = nullptr;
-  const Dims<4>* gemm_input_dims = nullptr;
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+  const RuntimeShape* gemm_input_shape = nullptr;
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
   const bool need_dilated_im2col =
       dilation_width_factor != 1 || dilation_height_factor != 1;
   const bool need_im2col = stride_width != 1 || stride_height != 1 ||
@@ -2104,53 +2246,47 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
     const int input_zero_point = -input_offset;
     TFLITE_DCHECK_GE(input_zero_point, 0);
     TFLITE_DCHECK_LE(input_zero_point, 255);
-    DilatedIm2col(input_data, input_dims, filter_dims, stride_width,
-                  stride_height, dilation_width_factor, dilation_height_factor,
-                  pad_width, pad_height, output_dims, input_zero_point,
-                  im2col_data);
+    DilatedIm2col(params, input_zero_point, input_shape, input_data,
+                  filter_shape, output_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else if (need_im2col) {
     TFLITE_DCHECK(im2col_data);
     const int input_zero_point = -input_offset;
     TFLITE_DCHECK_GE(input_zero_point, 0);
     TFLITE_DCHECK_LE(input_zero_point, 255);
-    Im2col(input_data, input_dims, stride_width, stride_height, pad_width,
-           pad_height, filter_height, filter_width, input_zero_point,
-           im2col_data, im2col_dims);
+    Im2col(params, filter_height, filter_width, input_zero_point, input_shape,
+           input_data, im2col_shape, im2col_data);
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else {
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    gemm_input_dims = &input_dims;
+    gemm_input_shape = &input_shape;
   }
 
-  const int gemm_input_rows = gemm_input_dims->sizes[0];
+  const int gemm_input_rows = gemm_input_shape->Dims(3);
   // Using FlatSizeSkipDim causes segfault in some contexts (see b/79927784).
   // The root cause has not yet been identified though. Same applies below for
   // the other calls commented out. This is a partial rollback of cl/196819423.
-  // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_dims, 0);
-  const int gemm_input_cols = gemm_input_dims->sizes[1] *
-                              gemm_input_dims->sizes[2] *
-                              gemm_input_dims->sizes[3];
-  const int filter_rows = filter_dims.sizes[3];
+  // const int gemm_input_cols = FlatSizeSkipDim(*gemm_input_shape, 3);
+  const int gemm_input_cols = gemm_input_shape->Dims(0) *
+                              gemm_input_shape->Dims(1) *
+                              gemm_input_shape->Dims(2);
+  const int filter_rows = filter_shape.Dims(0);
   // See b/79927784.
-  // const int filter_cols = FlatSizeSkipDim(filter_dims, 3);
+  // const int filter_cols = FlatSizeSkipDim(filter_shape, 0);
   const int filter_cols =
-      filter_dims.sizes[0] * filter_dims.sizes[1] * filter_dims.sizes[2];
-  const int output_rows = output_dims.sizes[0];
+      filter_shape.Dims(1) * filter_shape.Dims(2) * filter_shape.Dims(3);
+  const int output_rows = output_shape.Dims(3);
   // See b/79927784.
-  // const int output_cols = FlatSizeSkipDim(output_dims, 0);
+  // const int output_cols = FlatSizeSkipDim(output_shape, 3);
   const int output_cols =
-      output_dims.sizes[1] * output_dims.sizes[2] * output_dims.sizes[3];
+      output_shape.Dims(0) * output_shape.Dims(1) * output_shape.Dims(2);
   TFLITE_DCHECK_EQ(output_rows, filter_rows);
   TFLITE_DCHECK_EQ(output_cols, gemm_input_cols);
   TFLITE_DCHECK_EQ(filter_cols, gemm_input_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_rows);
   gemmlowp::MatrixMap filter_matrix(
       filter_data, filter_rows, filter_cols);
   gemmlowp::MatrixMap input_matrix(
@@ -2166,6 +2302,43 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
       input_offset, output_pipeline);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
+                 int32 input_offset, const uint8* filter_data,
+                 const Dims<4>& filter_dims, int32 filter_offset,
+                 const int32* bias_data, const Dims<4>& bias_dims,
+                 int stride_width, int stride_height, int dilation_width_factor,
+                 int dilation_height_factor, int pad_width, int pad_height,
+                 int32 output_offset, int32 output_multiplier, int output_shift,
+                 int32 output_activation_min, int32 output_activation_max,
+                 uint8* output_data, const Dims<4>& output_dims,
+                 uint8* im2col_data, const Dims<4>& im2col_dims,
+                 gemmlowp::GemmContext* gemm_context) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+  op_params.dilation_width_factor = dilation_width_factor;
+  op_params.dilation_height_factor = dilation_height_factor;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims),
+       filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+       output_data, DimsToShape(im2col_dims), im2col_data, gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
                  int32 input_offset, const uint8* filter_data,
                  const Dims<4>& filter_dims, int32 filter_offset,
@@ -2184,6 +2357,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
@@ -2213,6 +2387,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 void Conv(const uint8* input_data, const Dims<4>& input_dims,
@@ -2236,13 +2411,14 @@ void Conv(const uint8* input_data, const Dims<4>& input_dims,
        im2col_data, im2col_dims, gemm_context);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 void Im2col(const T* input_data, const Dims<4>& input_dims, int stride,
             int pad_width, int pad_height, int kheight, int kwidth,
-            uint8 byte_zero, T* output_data, const Dims<4>& output_dims) {
+            uint8 zero_byte, T* output_data, const Dims<4>& output_dims) {
   Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight,
-         kwidth, byte_zero, output_data, output_dims);
+         kwidth, zero_byte, output_data, output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
@@ -2266,6 +2442,7 @@ void ConvAsGemm(const float* input_data, const Dims<4>& input_dims,
                                        output_dims);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims,
@@ -5832,58 +6009,45 @@ void Maximum(const RuntimeShape& input1_shape, const T* input1_data,
 }
 
 template 
-void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
-                     const Dims<4>& filter_dims, int stride_width,
-                     int stride_height, int pad_width, int pad_height,
-                     const Dims<4>& output_dims, uint8 zero_byte,
-                     T* im2col_data) {
+void TransposeIm2col(const ConvParams& params, uint8 zero_byte,
+                     const RuntimeShape& input_shape, const T* input_data,
+                     const RuntimeShape& filter_shape,
+                     const RuntimeShape& output_shape, T* im2col_data) {
   gemmlowp::ScopedProfilingLabel label("TransposeIm2col");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   TFLITE_DCHECK(im2col_data);
 
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 3);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
-  MatchingArraySize(output_dims, 0, filter_dims, 0);  // output_depth
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 0);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  MatchingDim(output_shape, 3, filter_shape, 3);  // output_depth
 
   // Construct the MxN sized im2col matrix.
   // The rows M, are sub-ordered B x H x W
-  Dims<4> row_dims;
-  row_dims.sizes[0] = output_width;
-  row_dims.sizes[1] = output_height;
-  row_dims.sizes[2] = batches;
-  row_dims.sizes[3] = 1;
-  ComputeStrides(&row_dims);
-
+  const RuntimeShape row_shape({1, batches, output_height, output_width});
   // The columns, N, are sub-ordered Kh x Kw x Din
-  Dims<4> col_dims;
-  col_dims.sizes[0] = input_depth;
-  col_dims.sizes[1] = filter_width;
-  col_dims.sizes[2] = filter_height;
-  col_dims.sizes[3] = 1;
-  ComputeStrides(&col_dims);
-
+  const RuntimeShape col_shape({1, filter_height, filter_width, input_depth});
   // Use dimensions M and N to construct dims for indexing directly into im2col
-  Dims<4> im2col_dims;
-  im2col_dims.sizes[0] = FlatSize(col_dims);
-  im2col_dims.sizes[1] = FlatSize(row_dims);
-  im2col_dims.sizes[2] = 1;
-  im2col_dims.sizes[3] = 1;
-  ComputeStrides(&im2col_dims);
+  const RuntimeShape im2col_shape(
+      {1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
 
   // Build the im2col matrix by looping through all the input pixels,
   // computing their influence on the output, rather than looping through all
   // the output pixels. We therefore must initialize the im2col array to zero.
   // This is potentially inefficient because we subsequently overwrite bytes
   // set here. However, in practice memset is very fast and costs negligible.
-  memset(im2col_data, zero_byte, FlatSize(im2col_dims) * sizeof(T));
+  memset(im2col_data, zero_byte, im2col_shape.FlatSize() * sizeof(T));
 
   // Loop through the output batches
   for (int batch = 0; batch < batches; ++batch) {
@@ -5903,11 +6067,11 @@ void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
               if ((out_x >= 0) && (out_x < output_width)) {
                 // Copy the input elements of this pixel
                 T const* src =
-                    input_data + Offset(input_dims, 0, in_x, in_y, batch);
+                    input_data + Offset(input_shape, batch, in_y, in_x, 0);
+                int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+                int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
                 T* dst = im2col_data +
-                         Offset(im2col_dims,
-                                Offset(col_dims, 0, filter_x, filter_y, 0),
-                                Offset(row_dims, out_x, out_y, batch, 0), 0, 0);
+                         Offset(im2col_shape, 0, 0, row_offset, col_offset);
                 memcpy(dst, src, input_depth * sizeof(T));
               }
             }
@@ -5918,31 +6082,71 @@ void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
-inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
-                          const float* filter_data, const Dims<4>& filter_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, float* output_data,
-                          const Dims<4>& output_dims, float* im2col_data,
-                          const Dims<4>& im2col_dims) {
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+template 
+void TransposeIm2col(const T* input_data, const Dims<4>& input_dims,
+                     const Dims<4>& filter_dims, int stride_width,
+                     int stride_height, int pad_width, int pad_height,
+                     const Dims<4>& output_dims, uint8 zero_byte,
+                     T* im2col_data) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data,
+                  DimsToShape(filter_dims), DimsToShape(output_dims),
+                  im2col_data);
+}
+
+inline void TransposeConv(
+    const ConvParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& filter_shape,
+    const float* filter_data, const RuntimeShape& output_shape,
+    float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) {
   gemmlowp::ScopedProfilingLabel label("TransposeConv");
 
   // Note we could use transposed weights with forward conv for unstrided
   // cases. But we are already getting good performance with this code as-is.
   TFLITE_DCHECK(im2col_data);
-  TransposeIm2col(input_data, input_dims, filter_dims, stride_width,
-                  stride_height, pad_width, pad_height, output_dims, 0,
-                  im2col_data);
+  TransposeIm2col(params, 0, input_shape, input_data, filter_shape,
+                  output_shape, im2col_data);
 
   const auto im2col_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(im2col_data, im2col_dims);
+      MapAsMatrixWithLastDimAsRows(im2col_data, im2col_shape);
   const auto filter_matrix_map =
-      MapAsMatrixWithLastDimAsCols(filter_data, filter_dims);
+      MapAsMatrixWithFirstDimAsCols(filter_data, filter_shape);
   auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+      MapAsMatrixWithLastDimAsRows(output_data, output_shape);
 
   Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void TransposeConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, float* output_data,
+                          const Dims<4>& output_dims, float* im2col_data,
+                          const Dims<4>& im2col_dims) {
+  tflite::ConvParams op_params;
+  // Padding type is ignored, but still set.
+  op_params.padding_type = PaddingType::kSame;
+  op_params.padding_values.width = pad_width;
+  op_params.padding_values.height = pad_height;
+  op_params.stride_width = stride_width;
+  op_params.stride_height = stride_height;
+
+  TransposeConv(op_params, DimsToShape(input_dims), input_data,
+                DimsToShape(filter_dims), filter_data, DimsToShape(output_dims),
+                output_data, DimsToShape(im2col_dims), im2col_data);
+}
+
 }  // namespace optimized_ops
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index c4c7cf3842..023707d466 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -26,8 +26,8 @@ enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
 enum class PaddingType : uint8 { kNone, kSame, kValid };
 
 struct PaddingValues {
-  int8 width;
-  int8 height;
+  int16 width;
+  int16 height;
 };
 
 // This enumeration allows for non-default formats for the weights array
@@ -734,10 +734,10 @@ struct ConvParams {
   PaddingType padding_type;
   PaddingValues padding_values;
   // TODO(starka): This was just "stride", so check that width+height is OK.
-  int8 stride_width;
-  int8 stride_height;
-  int8 dilation_width_factor;
-  int8 dilation_height_factor;
+  int16 stride_width;
+  int16 stride_height;
+  int16 dilation_width_factor;
+  int16 dilation_height_factor;
   // uint8 inference params.
   // TODO(b/65838351): Use smaller types if appropriate.
   int32 input_offset;
@@ -745,8 +745,12 @@ struct ConvParams {
   int32 output_offset;
   int32 output_multiplier;
   int output_shift;
-  int32 output_activation_min;
-  int32 output_activation_max;
+  // uint8, etc, activation params.
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
 };
 
 struct DepthToSpaceParams {
@@ -756,8 +760,8 @@ struct DepthToSpaceParams {
 struct DepthwiseParams {
   PaddingType padding_type;
   PaddingValues padding_values;
-  int8 stride;
-  int8 depth_multiplier;
+  int16 stride;
+  int16 depth_multiplier;
   // uint8 inference params.
   // TODO(b/65838351): Use smaller types if appropriate.
   int32 input_offset;
@@ -765,8 +769,12 @@ struct DepthwiseParams {
   int32 output_offset;
   int32 output_multiplier;
   int output_shift;
-  int32 output_activation_min;
-  int32 output_activation_max;
+  // uint8, etc, activation params.
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
 };
 
 struct DequantizationParams {
@@ -787,13 +795,17 @@ struct FullyConnectedParams {
   int32 output_offset;
   int32 output_multiplier;
   int output_shift;
-  int32 output_activation_min;
-  int32 output_activation_max;
+  // uint8, etc, activation params.
+  int32 quantized_activation_min;
+  int32 quantized_activation_max;
+  // float activation params.
+  float float_activation_min;
+  float float_activation_max;
   FullyConnectedWeightsFormat weights_format;
 };
 
 struct GatherParams {
-  int8 input_rank;
+  int16 input_rank;
   int16 axis;
 };
 
-- 
GitLab


From 56d4fc8ff67f48294ae5cb0a7f9ff3d954463aa3 Mon Sep 17 00:00:00 2001
From: Mark Daoust 
Date: Thu, 13 Sep 2018 09:47:30 -0700
Subject: [PATCH 0139/1357] Add a `namedtuple` factory that accepts
 doc-strings.

PiperOrigin-RevId: 212828094
---
 tensorflow/python/estimator/model_fn.py       | 93 ++++++++++++++-----
 tensorflow/python/util/collections.py         | 51 ++++++++++
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 4 files changed, 125 insertions(+), 23 deletions(-)
 create mode 100644 tensorflow/python/util/collections.py

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 439cc2e3a4..728de65559 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -33,6 +33,7 @@ from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.util import nest
+from tensorflow.python.util.collections import tf_namedtuple
 from tensorflow.python.util.tf_export import estimator_export
 
 
@@ -62,14 +63,65 @@ EXPORT_TAG_MAP = {
     ModeKeys.EVAL: [tag_constants.EVAL],
 }
 
+# pylint: disable=line-too-long
+
+_EstimatorSpecNamedTuple = tf_namedtuple('EstimatorSpec', [   # pylint: disable=invalid-name
+    ('mode',
+     'A `ModeKeys`. Specifies if this is training, evaluation or prediction.'
+    ),
+    ('predictions', 'Predictions `Tensor` or dict of `Tensor`.'),
+    ('loss',
+     'Training loss `Tensor`. Must be either scalar, or with shape `[1]`.'),
+    ('train_op', 'Op to run one training step.'),
+    ('eval_metric_ops',
+     """Dict of metric results keyed by name.
+
+     The values of the dict are the results of calling a metric function,
+     namely a `(metric_tensor, update_op)` tuple.
+
+     `metric_tensor` should be evaluated without any impact on state
+     (typically is a pure computation results based on variables.).
+     For example, it should not trigger the `update_op` or requires any
+     input fetching."""
+    ),
+    ('export_outputs',
+     """Describes the output signatures to be exported to `SavedModel`.
+
+     A dict `{name: output}` where:
+
+       * `name` is An arbitrary name for this output.
+       * `output` is an `ExportOutput` object such as `ClassificationOutput`,
+         `RegressionOutput`, or `PredictOutput`.
+
+     Single-headed models only need to specify one entry in this dictionary.
+     Multi-headed models should specify one entry for each head, one of
+     which must be named using
+     `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is
+     provided, a default `PredictOutput` mapping to `predictions` will be
+     created."""
+    ),
+    ('training_chief_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training.'
+    ),
+    ('training_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run on all workers during training.'
+    ),
+    ('scaffold',
+     'A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training.'
+    ),
+    ('evaluation_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run during evaluation.'
+    ),
+    ('prediction_hooks',
+     'Iterable of `tf.train.SessionRunHook` objects to run during predictions.'
+    ),
+])
+
+# pylint: enable=line-too-long
+
 
 @estimator_export('estimator.EstimatorSpec')
-class EstimatorSpec(
-    collections.namedtuple('EstimatorSpec', [
-        'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops',
-        'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold',
-        'evaluation_hooks', 'prediction_hooks'
-    ])):
+class EstimatorSpec(_EstimatorSpecNamedTuple):
   """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
 
   `EstimatorSpec` fully defines the model to be run by an `Estimator`.
@@ -156,23 +208,22 @@ class EstimatorSpec(
         A dict `{name: output}` where:
         * name: An arbitrary name for this output.
         * output: an `ExportOutput` object such as `ClassificationOutput`,
-            `RegressionOutput`, or `PredictOutput`.
-        Single-headed models only need to specify one entry in this dictionary.
-        Multi-headed models should specify one entry for each head, one of
-        which must be named using
-        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
-        If no entry is provided, a default `PredictOutput` mapping to
-        `predictions` will be created.
-      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run on the chief worker during training.
-      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        on all workers during training.
+          `RegressionOutput`, or `PredictOutput`. Single-headed models only need
+          to specify one entry in this dictionary. Multi-headed models should
+          specify one entry for each head, one of which must be named using
+          `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry
+          is provided, a default `PredictOutput` mapping to `predictions` will
+          be created.
+      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        on the chief worker during training.
+      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run on
+        all workers during training.
       scaffold: A `tf.train.Scaffold` object that can be used to set
         initialization, saver, and more to be used in training.
-      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run during evaluation.
-      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to
-        run during predictions.
+      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        during evaluation.
+      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        during predictions.
 
     Returns:
       A validated `EstimatorSpec` object.
diff --git a/tensorflow/python/util/collections.py b/tensorflow/python/util/collections.py
new file mode 100644
index 0000000000..ef5290ee8b
--- /dev/null
+++ b/tensorflow/python/util/collections.py
@@ -0,0 +1,51 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Collections utilities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+
+def tf_namedtuple(name, fieldnames_and_docs):
+  """A `namedtuple` class factory that supports field-docstrings.
+
+  ```
+  cls = tf_namedtuple("MyNamedTuple",[("a", "Docs for a"),
+                                      ("b", "Docs for b")])
+  cls.a.__doc__  # ==> "Docs for a"
+  ```
+
+  Args:
+    name: The name of the new class.
+    fieldnames_and_docs: A sequence of `(fieldname, docstring)` pairs. The
+      fieldnames are passed to `collections.namedtuple`.
+
+  Returns:
+    A namedtuple class.
+  """
+  fieldnames_and_docs = list(fieldnames_and_docs)
+  fieldnames = [fieldname for fieldname, doc in fieldnames_and_docs]
+  cls = collections.namedtuple(name, fieldnames)
+
+  for fieldname, doc in fieldnames_and_docs:
+    old_prop = getattr(cls, fieldname)
+    new_prop = property(fget=old_prop.fget, fset=old_prop.fset,
+                        fdel=old_prop.fdel, doc=doc)
+    setattr(cls, fieldname, new_prop)
+
+  return cls
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
index aa6ac46613..37695572c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: ""
-  is_instance: ""
+  is_instance: ""
   is_instance: ""
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
index aa6ac46613..37695572c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: ""
-  is_instance: ""
+  is_instance: ""
   is_instance: ""
   member {
     name: "eval_metric_ops"
-- 
GitLab


From a9a5929d06e5eb4dd38bef63d56c4e338bbd38a2 Mon Sep 17 00:00:00 2001
From: James Qin 
Date: Thu, 13 Sep 2018 09:50:09 -0700
Subject: [PATCH 0140/1357] Register a new Sum op for T:int64 and Tidx:int32

PiperOrigin-RevId: 212828463
---
 tensorflow/core/kernels/reduction_ops_sum.cc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index 5318d8c133..e4ca89eca3 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -76,7 +76,15 @@ REGISTER_KERNEL_BUILDER(
         .HostMemory("output")
         .HostMemory("reduction_indices"),
     ReductionOp>);
-
+REGISTER_KERNEL_BUILDER(
+    Name("Sum")
+        .Device(DEVICE_GPU)
+        .TypeConstraint("T")
+        .TypeConstraint("Tidx")
+        .HostMemory("input")
+        .HostMemory("output")
+        .HostMemory("reduction_indices"),
+    ReductionOp>);
 #endif
 
 #ifdef TENSORFLOW_USE_SYCL
-- 
GitLab


From c6c6aad47dfb24cf4b5db565f49b59c2d224362b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 09:57:24 -0700
Subject: [PATCH 0141/1357] Removed `contrib.layers` dependency
 `bucket_by_sequence_length` tests.

PiperOrigin-RevId: 212829466
---
 .../contrib/data/python/kernel_tests/BUILD    |   1 -
 .../python/kernel_tests/bucketing_test.py     | 104 ++++++++++++------
 2 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index 1f947e97f9..b3c90ded39 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -44,7 +44,6 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/contrib/data/python/ops:grouping",
-        "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
index 94718bb477..48971f2ccc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
@@ -21,7 +21,6 @@ import random
 
 import numpy as np
 
-from tensorflow.contrib import layers
 from tensorflow.contrib.data.python.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -537,6 +536,40 @@ def _element_length_fn(x, y=None):
   return array_ops.shape(x)[0]
 
 
+def _to_sparse_tensor(record):
+  return sparse_tensor.SparseTensor(**record)
+
+
+def _format_record(array, sparse):
+  if sparse:
+    return {
+        "values": array,
+        "indices": [[i] for i in range(len(array))],
+        "dense_shape": (len(array),)
+    }
+  return array
+
+
+def _get_record_type(sparse):
+  if sparse:
+    return {
+        "values": dtypes.int64,
+        "indices": dtypes.int64,
+        "dense_shape": dtypes.int64
+    }
+  return dtypes.int32
+
+
+def _get_record_shape(sparse):
+  if sparse:
+    return {
+        "values": tensor_shape.TensorShape([None,]),
+        "indices": tensor_shape.TensorShape([None, 1]),
+        "dense_shape": tensor_shape.TensorShape([1,])
+    }
+  return tensor_shape.TensorShape([None])
+
+
 class BucketBySequenceLength(test.TestCase):
 
   def testBucket(self):
@@ -545,23 +578,28 @@ class BucketBySequenceLength(test.TestCase):
     batch_sizes = [10, 8, 4, 2]
     lengths = [8, 13, 25, 35]
 
-    def element_gen():
-      # Produce 1 batch for each bucket
-      elements = []
-      for batch_size, length in zip(batch_sizes, lengths):
-        record_len = length - 1
-        for _ in range(batch_size):
-          elements.append([1] * record_len)
-          record_len = length
-      random.shuffle(elements)
-      for el in elements:
-        yield (el,)
+    def build_dataset(sparse):
+      def _generator():
+        # Produce 1 batch for each bucket
+        elements = []
+        for batch_size, length in zip(batch_sizes, lengths):
+          record_len = length - 1
+          for _ in range(batch_size):
+            elements.append([1] * record_len)
+            record_len = length
+        random.shuffle(elements)
+        for el in elements:
+          yield (_format_record(el, sparse),)
+      dataset = dataset_ops.Dataset.from_generator(
+          _generator,
+          (_get_record_type(sparse),),
+          (_get_record_shape(sparse),))
+      if sparse:
+        dataset = dataset.map(lambda x: (_to_sparse_tensor(x),))
+      return dataset
 
     def _test_bucket_by_padding(no_padding):
-      dataset = dataset_ops.Dataset.from_generator(
-          element_gen, (dtypes.int64,), ([None],))
-      if no_padding:
-        dataset = dataset.map(lambda x: (layers.dense_to_sparse(x),))
+      dataset = build_dataset(sparse=no_padding)
       dataset = dataset.apply(
           grouping.bucket_by_sequence_length(
               _element_length_fn,
@@ -677,20 +715,23 @@ class BucketBySequenceLength(test.TestCase):
 
   def testTupleElements(self):
 
-    def elements_gen():
-      text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
-      label = [1, 2, 1, 2]
-      for x, y in zip(text, label):
-        yield (x, y)
+    def build_dataset(sparse):
+      def _generator():
+        text = [[1, 2, 3], [3, 4, 5, 6, 7], [1, 2], [8, 9, 0, 2, 3]]
+        label = [1, 2, 1, 2]
+        for x, y in zip(text, label):
+          yield (_format_record(x, sparse), y)
+      dataset = dataset_ops.Dataset.from_generator(
+          generator=_generator,
+          output_types=(_get_record_type(sparse), dtypes.int32),
+          output_shapes=(_get_record_shape(sparse),
+                         tensor_shape.TensorShape([])))
+      if sparse:
+        dataset = dataset.map(lambda x, y: (_to_sparse_tensor(x), y))
+      return dataset
 
     def _test_tuple_elements_by_padding(no_padding):
-      dataset = dataset_ops.Dataset.from_generator(
-          generator=elements_gen,
-          output_shapes=(tensor_shape.TensorShape([None]),
-                         tensor_shape.TensorShape([])),
-          output_types=(dtypes.int32, dtypes.int32))
-      if no_padding:
-        dataset = dataset.map(lambda x, y: (layers.dense_to_sparse(x), y))
+      dataset = build_dataset(sparse=no_padding)
       dataset = dataset.apply(grouping.bucket_by_sequence_length(
           element_length_func=_element_length_fn,
           bucket_batch_sizes=[2, 2, 2],
@@ -727,12 +768,11 @@ class BucketBySequenceLength(test.TestCase):
       input_data = [range(i+1) for i in range(min_len, max_len)]
       def generator_fn():
         for record in input_data:
-          yield record
+          yield _format_record(record, sparse=True)
       dataset = dataset_ops.Dataset.from_generator(
           generator=generator_fn,
-          output_shapes=(tensor_shape.TensorShape([None])),
-          output_types=(dtypes.int64))
-      dataset = dataset.map(lambda x: layers.dense_to_sparse(x, eos_token=-1))
+          output_types=_get_record_type(sparse=True))
+      dataset = dataset.map(_to_sparse_tensor)
       return dataset
 
     def _compute_expected_batches():
-- 
GitLab


From 609a84774dfdbf6b54d91f70bed07f8d01f87a66 Mon Sep 17 00:00:00 2001
From: Asim Shankar 
Date: Thu, 13 Sep 2018 10:01:41 -0700
Subject: [PATCH 0142/1357] Gracefully handle invalid inputs in Split and
 ReverseSequence.

PiperOrigin-RevId: 212830139
---
 tensorflow/core/kernels/reverse_sequence_op.cc | 5 +++--
 tensorflow/core/kernels/split_op.cc            | 7 ++++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/reverse_sequence_op.cc b/tensorflow/core/kernels/reverse_sequence_op.cc
index 15a707a9c6..cded417986 100644
--- a/tensorflow/core/kernels/reverse_sequence_op.cc
+++ b/tensorflow/core/kernels/reverse_sequence_op.cc
@@ -64,7 +64,7 @@ void CheckErrors(OpKernelContext* context, int batch_dim, int seq_dim) {
   OP_REQUIRES(context, seq_lens.NumElements() == input.dim_size(batch_dim),
               errors::InvalidArgument("len(seq_lens) != input.dims(", batch_dim,
                                       "), ", "(", seq_lens.NumElements(),
-                                      " vs. ", input.dim_size(batch_dim)));
+                                      " vs. ", input.dim_size(batch_dim), ")"));
 
   for (size_t d = 0; d < seq_lens_vec.size(); ++d) {
     OP_REQUIRES(context, seq_lens_vec[d] >= 0,
@@ -91,7 +91,7 @@ void CheckErrorsGPU(OpKernelContext* context, int batch_dim, int seq_dim) {
   OP_REQUIRES(context, seq_lens.NumElements() == input.dim_size(batch_dim),
               errors::InvalidArgument("len(seq_lens) != input.dims(", batch_dim,
                                       "), ", "(", seq_lens.NumElements(),
-                                      " vs. ", input.dim_size(batch_dim)));
+                                      " vs. ", input.dim_size(batch_dim), ")"));
 }
 
 template <>
@@ -127,6 +127,7 @@ class ReverseSequenceOp : public OpKernel {
     auto seq_lens_t = seq_lens.vec();
 
     CheckErrors(context, batch_dim_, seq_dim_);
+    if (!context->status().ok()) return;
 
     const int input_dims = input.dims();
 
diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc
index 7cc3c532c9..11db72bfa3 100644
--- a/tensorflow/core/kernels/split_op.cc
+++ b/tensorflow/core/kernels/split_op.cc
@@ -49,7 +49,12 @@ class SplitOpBase : public OpKernel {
   void ComputeEasyCases(OpKernelContext* context, bool* done) {
     const Tensor& input = context->input(1);
     const TensorShape& input_shape = input.shape();
-    const int32 split_dim_orig = context->input(0).flat()(0);
+    const Tensor& split_dim_tensor = context->input(0);
+    OP_REQUIRES(
+        context, split_dim_tensor.shape().dims() == 0,
+        errors::InvalidArgument("split_dim must be a scalar but has rank ",
+                                split_dim_tensor.shape().dims()));
+    const int32 split_dim_orig = split_dim_tensor.flat()(0);
     const int32 split_dim =
         split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig;
     const int32 num_split = num_outputs();
-- 
GitLab


From 1050e5dc93cd579607495df6086f3cec2d9aa1f4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 10:24:23 -0700
Subject: [PATCH 0143/1357] Convert more kernel signatures to use runtime
 shapes.

PiperOrigin-RevId: 212834379
---
 .../internal/optimized/optimized_ops.h        | 359 ++++++++++++------
 1 file changed, 250 insertions(+), 109 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index baed8f4993..370ca03c92 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -200,6 +200,8 @@ struct TTypes {
       UnalignedConstMatrix;
 };
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
 // TODO(b/62193649): this function is only needed as long
 // as we have the --variable_batch hack.
 template 
@@ -212,6 +214,18 @@ MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data,
   return MatrixMap(data, rows, cols);
 }
 
+// TODO(b/62193649): this function is only needed as long
+// as we have the --variable_batch hack.
+template 
+MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data,
+                                                   const RuntimeShape& shape,
+                                                   int rows) {
+  const int flatsize = shape.FlatSize();
+  TFLITE_DCHECK_EQ(flatsize % rows, 0);
+  const int cols = flatsize / rows;
+  return MatrixMap(data, rows, cols);
+}
+
 // This is like the template-parameter version, except that the power-of-two is
 // passed as a function parameter. The template version is to be preferred,
 // since some target hardware optimizations depend on the range of the exponent.
@@ -393,21 +407,24 @@ inline void optimized_ops_preload_l1_keep(const uint8* ptr) {
 // to a matrix*vector product. LSTM cells contain a fully-connected node;
 // when quantized, this becomes a special type of GEMV operation where
 // the output is 16bit-quantized, thus needs its own special path.
-inline void GEMVForLstmCell(const uint8* input_data, const Dims<4>& input_dims,
-                            const uint8* weights_data,
-                            const Dims<4>& weights_dims,
-                            uint8 weights_zero_point, const int32* bias_data,
-                            const Dims<4>& bias_dims, int32 accum_multiplier,
-                            int accum_shift, int16* output_data,
-                            const Dims<4>& output_dims) {
+inline void GEMVForLstmCell(const RuntimeShape& input_shape,
+                            const uint8* input_data,
+                            const RuntimeShape& weights_shape,
+                            const uint8* weights_data, uint8 weights_zero_point,
+                            const RuntimeShape& bias_shape,
+                            const int32* bias_data, int32 accum_multiplier,
+                            int accum_shift, const RuntimeShape& output_shape,
+                            int16* output_data) {
   gemmlowp::ScopedProfilingLabel label("GEMVForLstmCell");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_dims, 0), 1);
-  const int input_size = FlatSizeSkipDim(input_dims, 3);
-  const int output_size = MatchingArraySize(weights_dims, 1, output_dims, 0);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  const int output_size = MatchingDim(weights_shape, weights_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   // This special fast path for quantized LSTM cells does not try to support
   // odd sizes that we haven't encountered in any LSTM cell, that would
   // require special code (that would go untested until any LSTM cell
@@ -580,18 +597,21 @@ inline void GEMVForLstmCell(const uint8* input_data, const Dims<4>& input_dims,
 
 #ifdef GEMMLOWP_NEON
 inline void GEMVForLstmCellWithSymmetricRange(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 accum_multiplier,
-    int accum_shift, int16* output_data, const Dims<4>& output_dims) {
+    const RuntimeShape& input_shape, const uint8* input_data,
+    const RuntimeShape& weights_shape, const uint8* weights_data,
+    const RuntimeShape& bias_shape, const int32* bias_data,
+    int32 accum_multiplier, int accum_shift, const RuntimeShape& output_shape,
+    int16* output_data) {
   gemmlowp::ScopedProfilingLabel label("GEMVForLstmCellWithSymmetricRange");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_dims, 0), 1);
-  const int input_size = FlatSizeSkipDim(input_dims, 3);
-  const int output_size = MatchingArraySize(weights_dims, 1, output_dims, 0);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  const int output_size = MatchingDim(weights_shape, weights_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   // This special fast path for quantized LSTM cells does not try to support
   // odd sizes that we haven't encountered in any LSTM cell, that would
   // require special code (that would go untested until any LSTM cell
@@ -867,14 +887,16 @@ inline void GEMVForLstmCellWithSymmetricRange(
 }
 #endif
 
-inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
-                           const float* weights_data,
-                           const Dims<4>& weights_dims, const float* bias_data,
-                           const Dims<4>& bias_dims,
-                           float output_activation_min,
-                           float output_activation_max, float* output_data,
-                           const Dims<4>& output_dims) {
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const float* input_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& bias_shape,
+    const float* bias_data, const RuntimeShape& output_shape,
+    float* output_data) {
   gemmlowp::ScopedProfilingLabel label("FullyConnected");
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+
   // TODO(b/62193649): this convoluted shape computation (determining
   // input_rows from the weights_dims, then MapAsMatrixWithGivenNumberOfRows)
   // is because the current --variable_batch hack consists in overwriting the
@@ -883,18 +905,38 @@ inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
   // When that is fixed, this should become:
   // const auto input_matrix_map =
   //     MapAsMatrixWithFirstDimAsRows(input_data, input_dims);
-  const int input_rows = ArraySize(weights_dims, 0);
+  const int dims_count = weights_shape.DimensionsCount();
+  const int input_rows = weights_shape.Dims(dims_count - 1);
   const auto input_matrix_map =
-      MapAsMatrixWithGivenNumberOfRows(input_data, input_dims, input_rows);
+      MapAsMatrixWithGivenNumberOfRows(input_data, input_shape, input_rows);
   const auto filter_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(weights_data, weights_dims);
+      MapAsMatrixWithLastDimAsRows(weights_data, weights_shape);
   auto output_matrix_map =
-      MapAsMatrixWithFirstDimAsRows(output_data, output_dims);
+      MapAsMatrixWithLastDimAsRows(output_data, output_shape);
 
   Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map);
-  AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data,
-                                   output_dims, output_activation_min,
-                                   output_activation_max);
+  AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max,
+                                   bias_shape, bias_data, output_shape,
+                                   output_data);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const float* input_data, const Dims<4>& input_dims,
+                           const float* weights_data,
+                           const Dims<4>& weights_dims, const float* bias_data,
+                           const Dims<4>& bias_dims,
+                           float output_activation_min,
+                           float output_activation_max, float* output_data,
+                           const Dims<4>& output_dims) {
+  tflite::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(weights_dims), weights_data,
+                 DimsToShape(bias_dims), bias_data, DimsToShape(output_dims),
+                 output_data);
 }
 
 // legacy, for compatibility with old checked-in code
@@ -912,20 +954,23 @@ void FullyConnected(const float* input_data, const Dims<4>& input_dims,
 
 #ifdef USE_NEON
 inline void FullyConnectedAsGEMV(
-    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
-    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_offset,
+    const RuntimeShape& input_shape, const uint8* input_data,
+    int32 input_offset, const RuntimeShape& filter_shape,
+    const uint8* filter_data, int32 filter_offset,
+    const RuntimeShape& bias_shape, const int32* bias_data, int32 output_offset,
     int32 output_multiplier, int output_shift, int32 output_activation_min,
-    int32 output_activation_max, uint8* output_data,
-    const Dims<4>& output_dims) {
+    int32 output_activation_max, const RuntimeShape& output_shape,
+    uint8* output_data) {
   gemmlowp::ScopedProfilingLabel label("FullyConnectedAsGEMV/8bit");
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(bias_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(output_dims));
-  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_dims, 0), 1);
-  const int input_size = FlatSizeSkipDim(input_dims, 3);
-  const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(output_shape, output_dim_count - 1), 1);
+  const int input_size = FlatSizeSkipDim(input_shape, 0);
+  const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   static constexpr int kPeel = 4;
   const bool shift_left = (output_shift <= 0);
   for (int k = 0; k < input_size; k += 64) {
@@ -1096,42 +1141,47 @@ struct GemmlowpOutputPipeline {
   }
 };
 
-inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
-                           int32 input_offset, const uint8* filter_data,
-                           const Dims<4>& filter_dims, int32 filter_offset,
-                           const int32* bias_data, const Dims<4>& bias_dims,
-                           int32 output_offset, int32 output_multiplier,
-                           int output_shift, int32 output_activation_min,
-                           int32 output_activation_max, uint8* output_data,
-                           const Dims<4>& output_dims,
-                           gemmlowp::GemmContext* gemm_context) {
+inline void FullyConnected(
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    uint8* output_data, gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("FullyConnected/8bit");
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = FlatSizeSkipDim(output_dims, 0);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
 #ifdef USE_NEON
-  const int output_size = MatchingArraySize(filter_dims, 1, output_dims, 0);
+  const int output_size = MatchingDim(filter_shape, filter_dim_count - 2,
+                                      output_shape, output_dim_count - 1);
   if (batches == 1 && !(output_size % 4)) {
     return FullyConnectedAsGEMV(
-        input_data, input_dims, input_offset, filter_data, filter_dims,
-        filter_offset, bias_data, bias_dims, output_offset, output_multiplier,
-        output_shift, output_activation_min, output_activation_max, output_data,
-        output_dims);
+        input_shape, input_data, input_offset, filter_shape, filter_data,
+        filter_offset, bias_shape, bias_data, output_offset, output_multiplier,
+        output_shift, output_activation_min, output_activation_max,
+        output_shape, output_data);
   }
 #endif  // USE_NEON
-  const int filter_rows = filter_dims.sizes[1];
-  const int filter_cols = filter_dims.sizes[0];
-  TFLITE_DCHECK_EQ(filter_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(filter_dims.sizes[3], 1);
-  const int output_rows = output_dims.sizes[0];
+  const int filter_rows = filter_shape.Dims(filter_dim_count - 2);
+  const int filter_cols = filter_shape.Dims(filter_dim_count - 1);
+  TFLITE_DCHECK_EQ(filter_shape.FlatSize(), filter_rows * filter_cols);
+  const int output_rows = output_shape.Dims(output_dim_count - 1);
   TFLITE_DCHECK_EQ(output_rows, filter_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1);
-  TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1);
+  TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_rows);
 
   gemmlowp::MatrixMap filter_matrix(
       filter_data, output_rows, filter_cols, filter_cols);
@@ -1148,30 +1198,65 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
       input_offset, output_pipeline);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
+                           int32 input_offset, const uint8* filter_data,
+                           const Dims<4>& filter_dims, int32 filter_offset,
+                           const int32* bias_data, const Dims<4>& bias_dims,
+                           int32 output_offset, int32 output_multiplier,
+                           int output_shift, int32 output_activation_min,
+                           int32 output_activation_max, uint8* output_data,
+                           const Dims<4>& output_dims,
+                           gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
 inline void FullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
-    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
-    const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset,
-    int32 output_multiplier, int output_shift, int32 output_activation_min,
-    int32 output_activation_max, int16* output_data, const Dims<4>& output_dims,
-    gemmlowp::GemmContext* gemm_context) {
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& filter_shape,
+    const uint8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data_int32, const RuntimeShape& output_shape,
+    int16* output_data, gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("FullyConnected/Uint8Int16");
+  const int32 input_offset = params.input_offset;
+  const int32 filter_offset = params.weights_offset;
+  const int32 output_offset = params.output_offset;
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
   // This is a copy of the reference implementation. We do not currently have a
   // properly optimized version.
   (void)gemm_context;  // only used in properly optimized code.
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
   TFLITE_DCHECK_EQ(output_offset, 0);
+  TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
 
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = FlatSizeSkipDim(output_dims, 0);
-  const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(filter_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
 
   // Implementation of the fully connected node suited to the inside of an LSTM
   // cell. The operands are 8-bit integers, the accumulators are internally
@@ -1182,17 +1267,17 @@ inline void FullyConnected(
   if (batches == 1 && input_offset == -128 && output_activation_min == -32768 &&
       output_activation_max == 32767) {
     if (filter_offset == -128 && !(output_depth % 4) && !(accum_depth % 64)) {
-      GEMVForLstmCellWithSymmetricRange(input_data, input_dims, filter_data,
-                                        filter_dims, bias_data_int32, bias_dims,
-                                        output_multiplier, -output_shift,
-                                        output_data, output_dims);
+      GEMVForLstmCellWithSymmetricRange(
+          input_shape, input_data, filter_shape, filter_data, bias_shape,
+          bias_data_int32, output_multiplier, -output_shift, output_shape,
+          output_data);
       return;
     }
     if (!(output_depth % 4) && !(accum_depth % 8)) {
-      GEMVForLstmCell(input_data, input_dims, filter_data, filter_dims,
-                      filter_offset, bias_data_int32, bias_dims,
-                      output_multiplier, -output_shift, output_data,
-                      output_dims);
+      GEMVForLstmCell(input_shape, input_data, filter_shape, filter_data,
+                      filter_offset, bias_shape, bias_data_int32,
+                      output_multiplier, -output_shift, output_shape,
+                      output_data);
       return;
     }
   }
@@ -1226,6 +1311,31 @@ inline void FullyConnected(
       input_offset, output_pipeline);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void FullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims, int32 input_offset,
+    const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset,
+    const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset,
+    int32 output_multiplier, int output_shift, int32 output_activation_min,
+    int32 output_activation_max, int16* output_data, const Dims<4>& output_dims,
+    gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  FullyConnected(op_params, DimsToShape(input_dims), input_data,
+                 DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims),
+                 bias_data_int32, DimsToShape(output_dims), output_data,
+                 gemm_context);
+}
+
+// TODO(b/80418076): Move to legacy ops file, update invocations.
 // legacy, for compatibility with old checked-in code
 template 
 void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
@@ -1568,26 +1678,34 @@ struct ShuffledFullyConnectedWorkerTask : gemmlowp::Task {
 };
 
 inline void ShuffledFullyConnected(
-    const uint8* input_data, const Dims<4>& input_dims,
-    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
-    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
-    int output_shift, int32 output_activation_min, int32 output_activation_max,
-    int16* output_data, const Dims<4>& output_dims,
-    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+    const FullyConnectedParams& params, const RuntimeShape& input_shape,
+    const uint8* input_data, const RuntimeShape& weights_shape,
+    const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape,
+    int16* output_data, uint8* shuffled_input_workspace_data,
+    gemmlowp::GemmContext* gemm_context) {
   gemmlowp::ScopedProfilingLabel label("ShuffledFullyConnected/8bit");
+  const int32 output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32 output_activation_min = params.quantized_activation_min;
+  const int32 output_activation_max = params.quantized_activation_max;
   (void)gemm_context;  // only used in optimized code.
   TFLITE_DCHECK_EQ(output_activation_min, -32768);
   TFLITE_DCHECK_EQ(output_activation_max, 32767);
+  TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
   // TODO(benoitjacob): This really should be:
   //     const int batches = ArraySize(output_dims, 1);
   // but the current --variable_batch hack consists in overwriting the 3rd
   // dimension with the runtime batch size, as we don't keep track for each
   // array of which dimension is the batch dimension in it.
-  const int batches = FlatSizeSkipDim(output_dims, 0);
-  const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0);
-  const int accum_depth = ArraySize(weights_dims, 0);
-  TFLITE_DCHECK(IsPackedWithoutStrides(input_dims));
-  TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims));
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
+                                       output_shape, output_dim_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
   TFLITE_DCHECK((accum_depth % 16) == 0);
   TFLITE_DCHECK((output_depth % 4) == 0);
   // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
@@ -1684,6 +1802,28 @@ inline void ShuffledFullyConnected(
   gemm_context->workers_pool()->Execute(tasks);
 }
 
+// TODO(b/80418076): Move to legacy ops file, update invocations.
+// Legacy.
+inline void ShuffledFullyConnected(
+    const uint8* input_data, const Dims<4>& input_dims,
+    const uint8* shuffled_weights_data, const Dims<4>& weights_dims,
+    const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier,
+    int output_shift, int32 output_activation_min, int32 output_activation_max,
+    int16* output_data, const Dims<4>& output_dims,
+    uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) {
+  tflite::FullyConnectedParams op_params;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data,
+                         DimsToShape(weights_dims), shuffled_weights_data,
+                         DimsToShape(bias_dims), bias_data,
+                         DimsToShape(output_dims), output_data,
+                         shuffled_input_workspace_data, gemm_context);
+}
+
 template 
 inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w,
                                          int h, int b, int kheight, int kwidth,
@@ -3635,10 +3775,11 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
   bool gemm_already_performed = false;
 #ifdef GEMMLOWP_NEON
   if (fc_batches == 1 && !(fc_output_depth % 4) && !(fc_accum_depth % 8)) {
-    GEMVForLstmCell(concat_temp_data_uint8, concat_temp_dims,
-                    weights_data_uint8, weights_dims, weights_zero_point,
-                    bias_data_int32, bias_dims, accum_multiplier, accum_shift,
-                    activ_temp_data_int16, activ_temp_dims);
+    GEMVForLstmCell(DimsToShape(concat_temp_dims), concat_temp_data_uint8,
+                    DimsToShape(weights_dims), weights_data_uint8,
+                    weights_zero_point, DimsToShape(bias_dims), bias_data_int32,
+                    accum_multiplier, accum_shift, DimsToShape(activ_temp_dims),
+                    activ_temp_data_int16);
     gemm_already_performed = true;
   }
 #endif
-- 
GitLab


From 685f2832daa7084cd1bf484e8a7bb4333e246428 Mon Sep 17 00:00:00 2001
From: Guangda Lai 
Date: Thu, 13 Sep 2018 10:44:21 -0700
Subject: [PATCH 0144/1357] Add TF-TRT kernels/ops to contrib_kernels and
 contrib_ops_op_lib, so TF serving can use them.

PiperOrigin-RevId: 212838380
---
 tensorflow/contrib/BUILD | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 798f499870..d98a24994c 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -166,7 +166,9 @@ cc_library(
             "//tensorflow/contrib/kinesis:dataset_kernels",
         ],
         "//conditions:default": [],
-    }),
+    }) + if_not_windows([
+        "//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
+    ]),
 )
 
 cc_library(
@@ -203,5 +205,7 @@ cc_library(
             "//tensorflow/contrib/kinesis:dataset_ops_op_lib",
         ],
         "//conditions:default": [],
-    }),
+    }) + if_not_windows([
+        "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
+    ]),
 )
-- 
GitLab


From f54856b1448bed24534189e4aa2ebb9d0b4f5b9a Mon Sep 17 00:00:00 2001
From: Anton Dmitriev 
Date: Thu, 13 Sep 2018 18:13:47 +0000
Subject: [PATCH 0145/1357] Apply buildifier changes.

---
 tensorflow/contrib/ignite/BUILD | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD
index 1adc6c6ccc..9393b702d1 100644
--- a/tensorflow/contrib/ignite/BUILD
+++ b/tensorflow/contrib/ignite/BUILD
@@ -6,14 +6,14 @@ exports_files(["LICENSE"])
 
 load(
     "//tensorflow:tensorflow.bzl",
-    "tf_gen_op_wrapper_py",
-    "tf_kernel_library",
+    "if_not_windows",
+    "if_windows",
     "tf_custom_op_library",
     "tf_custom_op_py_library",
     "tf_gen_op_libs",
+    "tf_gen_op_wrapper_py",
+    "tf_kernel_library",
     "tf_py_test",
-    "if_not_windows",
-    "if_windows",
 )
 
 py_library(
@@ -55,15 +55,15 @@ cc_library(
     ]) + if_windows([
         "kernels/ignite_plain_client_windows.cc",
     ]),
+    copts = if_windows([
+        "-DWIN32_LEAN_AND_MEAN",
+    ]),
     deps = [
         "//tensorflow/core:framework_headers_lib",
         "//third_party/eigen3",
         "@boringssl//:ssl",
         "@protobuf_archive//:protobuf_headers",
     ],
-    copts = if_windows([
-        "-DWIN32_LEAN_AND_MEAN",
-    ]),
     alwayslink = 1,
 )
 
-- 
GitLab


From ee72b6a204232532e64221f1b9db7843ee13c312 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 11:30:45 -0700
Subject: [PATCH 0146/1357] Automated rollback of commit
 56d4fc8ff67f48294ae5cb0a7f9ff3d954463aa3

PiperOrigin-RevId: 212847619
---
 tensorflow/python/estimator/model_fn.py       | 93 +++++--------------
 tensorflow/python/util/collections.py         | 51 ----------
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 ...tensorflow.estimator.-estimator-spec.pbtxt |  2 +-
 4 files changed, 23 insertions(+), 125 deletions(-)
 delete mode 100644 tensorflow/python/util/collections.py

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 728de65559..439cc2e3a4 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -33,7 +33,6 @@ from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.util import nest
-from tensorflow.python.util.collections import tf_namedtuple
 from tensorflow.python.util.tf_export import estimator_export
 
 
@@ -63,65 +62,14 @@ EXPORT_TAG_MAP = {
     ModeKeys.EVAL: [tag_constants.EVAL],
 }
 
-# pylint: disable=line-too-long
-
-_EstimatorSpecNamedTuple = tf_namedtuple('EstimatorSpec', [   # pylint: disable=invalid-name
-    ('mode',
-     'A `ModeKeys`. Specifies if this is training, evaluation or prediction.'
-    ),
-    ('predictions', 'Predictions `Tensor` or dict of `Tensor`.'),
-    ('loss',
-     'Training loss `Tensor`. Must be either scalar, or with shape `[1]`.'),
-    ('train_op', 'Op to run one training step.'),
-    ('eval_metric_ops',
-     """Dict of metric results keyed by name.
-
-     The values of the dict are the results of calling a metric function,
-     namely a `(metric_tensor, update_op)` tuple.
-
-     `metric_tensor` should be evaluated without any impact on state
-     (typically is a pure computation results based on variables.).
-     For example, it should not trigger the `update_op` or requires any
-     input fetching."""
-    ),
-    ('export_outputs',
-     """Describes the output signatures to be exported to `SavedModel`.
-
-     A dict `{name: output}` where:
-
-       * `name` is An arbitrary name for this output.
-       * `output` is an `ExportOutput` object such as `ClassificationOutput`,
-         `RegressionOutput`, or `PredictOutput`.
-
-     Single-headed models only need to specify one entry in this dictionary.
-     Multi-headed models should specify one entry for each head, one of
-     which must be named using
-     `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry is
-     provided, a default `PredictOutput` mapping to `predictions` will be
-     created."""
-    ),
-    ('training_chief_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run on the chief worker during training.'
-    ),
-    ('training_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run on all workers during training.'
-    ),
-    ('scaffold',
-     'A `tf.train.Scaffold` object that can be used to set initialization, saver, and more to be used in training.'
-    ),
-    ('evaluation_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run during evaluation.'
-    ),
-    ('prediction_hooks',
-     'Iterable of `tf.train.SessionRunHook` objects to run during predictions.'
-    ),
-])
-
-# pylint: enable=line-too-long
-
 
 @estimator_export('estimator.EstimatorSpec')
-class EstimatorSpec(_EstimatorSpecNamedTuple):
+class EstimatorSpec(
+    collections.namedtuple('EstimatorSpec', [
+        'mode', 'predictions', 'loss', 'train_op', 'eval_metric_ops',
+        'export_outputs', 'training_chief_hooks', 'training_hooks', 'scaffold',
+        'evaluation_hooks', 'prediction_hooks'
+    ])):
   """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
 
   `EstimatorSpec` fully defines the model to be run by an `Estimator`.
@@ -208,22 +156,23 @@ class EstimatorSpec(_EstimatorSpecNamedTuple):
         A dict `{name: output}` where:
         * name: An arbitrary name for this output.
         * output: an `ExportOutput` object such as `ClassificationOutput`,
-          `RegressionOutput`, or `PredictOutput`. Single-headed models only need
-          to specify one entry in this dictionary. Multi-headed models should
-          specify one entry for each head, one of which must be named using
-          `signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`. If no entry
-          is provided, a default `PredictOutput` mapping to `predictions` will
-          be created.
-      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        on the chief worker during training.
-      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run on
-        all workers during training.
+            `RegressionOutput`, or `PredictOutput`.
+        Single-headed models only need to specify one entry in this dictionary.
+        Multi-headed models should specify one entry for each head, one of
+        which must be named using
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.
+        If no entry is provided, a default `PredictOutput` mapping to
+        `predictions` will be created.
+      training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run on the chief worker during training.
+      training_hooks: Iterable of `tf.train.SessionRunHook` objects to run
+        on all workers during training.
       scaffold: A `tf.train.Scaffold` object that can be used to set
         initialization, saver, and more to be used in training.
-      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        during evaluation.
-      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to run
-        during predictions.
+      evaluation_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run during evaluation.
+      prediction_hooks: Iterable of `tf.train.SessionRunHook` objects to
+        run during predictions.
 
     Returns:
       A validated `EstimatorSpec` object.
diff --git a/tensorflow/python/util/collections.py b/tensorflow/python/util/collections.py
deleted file mode 100644
index ef5290ee8b..0000000000
--- a/tensorflow/python/util/collections.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Collections utilities."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-
-def tf_namedtuple(name, fieldnames_and_docs):
-  """A `namedtuple` class factory that supports field-docstrings.
-
-  ```
-  cls = tf_namedtuple("MyNamedTuple",[("a", "Docs for a"),
-                                      ("b", "Docs for b")])
-  cls.a.__doc__  # ==> "Docs for a"
-  ```
-
-  Args:
-    name: The name of the new class.
-    fieldnames_and_docs: A sequence of `(fieldname, docstring)` pairs. The
-      fieldnames are passed to `collections.namedtuple`.
-
-  Returns:
-    A namedtuple class.
-  """
-  fieldnames_and_docs = list(fieldnames_and_docs)
-  fieldnames = [fieldname for fieldname, doc in fieldnames_and_docs]
-  cls = collections.namedtuple(name, fieldnames)
-
-  for fieldname, doc in fieldnames_and_docs:
-    old_prop = getattr(cls, fieldname)
-    new_prop = property(fget=old_prop.fget, fset=old_prop.fset,
-                        fdel=old_prop.fdel, doc=doc)
-    setattr(cls, fieldname, new_prop)
-
-  return cls
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
index 37695572c8..aa6ac46613 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: ""
-  is_instance: ""
+  is_instance: ""
   is_instance: ""
   member {
     name: "eval_metric_ops"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
index 37695572c8..aa6ac46613 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-estimator-spec.pbtxt
@@ -1,7 +1,7 @@
 path: "tensorflow.estimator.EstimatorSpec"
 tf_class {
   is_instance: ""
-  is_instance: ""
+  is_instance: ""
   is_instance: ""
   member {
     name: "eval_metric_ops"
-- 
GitLab


From edd2ee1f5e06d3c755aa402e2617f82fc49330aa Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 11:31:42 -0700
Subject: [PATCH 0147/1357] Fix the outfeed test and add a test for empty while
 loop body.

PiperOrigin-RevId: 212847779
---
 .../xla/service/hlo_module_dce_test.cc        | 48 +++++++++++++++++--
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
index d025edbb9c..bf66cc6bc3 100644
--- a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
@@ -372,26 +372,64 @@ TEST_F(HloModuleDceTest, WhileWithOutfeed) {
   auto module = ParseHloString(R"(
   HloModule OutfeedLoop
   WhileBody {
-    loop_var.1 = (s32[]) parameter(0)
+    body_param = (s32[]) parameter(0)
     token = token[] after-all()
     constant.2 = s32[] constant(2)
     outfeed_tuple = (s32[]) outfeed(constant.2, token)
-    get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0
+    get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
     constant.1 = s32[] constant(1)
     add = s32[] add(get-tuple-element.1, constant.1)
     ROOT tuple = (s32[]) tuple(add)
   }
   WhileCondition {
-    loop_var.2 = (s32[]) parameter(0)
-    get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0
+    cond_param = (s32[]) parameter(0)
+    get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0
     constant.2 = s32[] constant(10)
     ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2)
   }
   ENTRY SimpleLoop {
     constant.3 = s32[] constant(0)
     tuple.1 = (s32[]) tuple(constant.3)
-    ROOT while = (s32[]) while(tuple.1), condition=WhileCondition,
+    while = (s32[]) while(tuple.1), condition=WhileCondition,
+      body=WhileBody
+    ROOT rtuple = () tuple()
+  })")
+                    .ValueOrDie();
+
+  HloModuleDCE dce;
+  EXPECT_FALSE(dce.Run(module.get()).ValueOrDie());
+  EXPECT_FALSE(WhileBodyHasPassThroughTupleElement(module->entry_computation(),
+                                                   "while", 0));
+}
+
+// Tests that if a loop variable is not referenced outside of a kWhile, the loop
+// variable changes are not elided within the loop body, if the condition
+// computation uses them.
+TEST_F(HloModuleDceTest, WhileWithOnlyLoopVariableBumping) {
+  auto module = ParseHloString(R"(
+  HloModule InfiniteLoop
+  WhileBody {
+    body_param = (s32[], s32[]) parameter(0)
+    get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
+    get-tuple-element.2 = s32[] get-tuple-element(body_param), index=1
+    constant.1 = s32[] constant(1)
+    add = s32[] add(get-tuple-element.1, constant.1)
+    ROOT tuple = (s32[], s32[]) tuple(add, get-tuple-element.2)
+  }
+  WhileCondition {
+    cond_param = (s32[], s32[]) parameter(0)
+    get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0
+    constant.2 = s32[] constant(10)
+    ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2)
+  }
+  ENTRY SimpleLoop {
+    p0 = (s32[]) parameter(0)
+    get-tuple-element.5 = s32[] get-tuple-element(p0), index=0
+    constant.3 = s32[] constant(0)
+    tuple.1 = (s32[], s32[]) tuple(constant.3, get-tuple-element.5)
+    while = (s32[], s32[]) while(tuple.1), condition=WhileCondition,
       body=WhileBody
+    ROOT get-tuple-element.4 = s32[] get-tuple-element(while), index=1
   })")
                     .ValueOrDie();
 
-- 
GitLab


From e40c240642637695de8469441ccf8759c74fb63e Mon Sep 17 00:00:00 2001
From: Rohan Jain 
Date: Thu, 13 Sep 2018 11:40:22 -0700
Subject: [PATCH 0148/1357] Removing OutOfRangeError checks and testing going
 to the end of the dataset in PrefetchingOpsV2. There is a bit of non
 determinism with the FunctionBufferingResource that will get fixed with the
 MultiDeviceIterator and once we transition to that we can go back to enabling
 these checks.

PiperOrigin-RevId: 212849405
---
 .../distribute/python/prefetching_ops_v2_test.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
index bb10b546a1..16799104e8 100644
--- a/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
+++ b/tensorflow/contrib/distribute/python/prefetching_ops_v2_test.py
@@ -55,14 +55,14 @@ class PrefetchingOpsV2Test(test.TestCase):
     next_element = iterator.get_next()
 
     output = []
+    # TODO(rohanj): Modify test to go till the end of the dataset when we
+    # switch to MultiDeviceIterator.
     with self.cached_session() as sess:
-      for _ in range(5):
+      for _ in range(4):
         result = sess.run(next_element)
         self.assertEqual(2, len(result))
         output.extend(result)
-      self.assertEquals(set(range(10)), set(output))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+      self.assertEquals(set(range(8)), set(output))
 
   def testPrefetchToTwoDevicesWithReinit(self):
     if not test_util.is_gpu_available():
@@ -75,14 +75,14 @@ class PrefetchingOpsV2Test(test.TestCase):
     iterator = device_dataset.make_initializable_iterator()
     next_element = iterator.get_next()
 
+    # TODO(rohanj): Modify test to go till the end of the dataset when we
+    # switch to MultiDeviceIterator.
     with self.cached_session() as sess:
       sess.run(iterator.initializer)
-      for _ in range(5):
-        sess.run(next_element)
-      with self.assertRaises(errors.OutOfRangeError):
+      for _ in range(4):
         sess.run(next_element)
       sess.run(iterator.initializer)
-      for _ in range(5):
+      for _ in range(4):
         sess.run(next_element)
 
 
-- 
GitLab


From 0fbeac58e098cf0ac8e131617ebb6780e10c9606 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac 
Date: Thu, 13 Sep 2018 11:51:06 -0700
Subject: [PATCH 0149/1357] Prevent an integral division by zero (undefined
 behavior).

PiperOrigin-RevId: 212851417
---
 tensorflow/core/lib/wav/wav_io.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc
index 36d939e061..c536b5688e 100644
--- a/tensorflow/core/lib/wav/wav_io.cc
+++ b/tensorflow/core/lib/wav/wav_io.cc
@@ -232,6 +232,11 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string,
         "Bad audio format for WAV: Expected 1 (PCM), but got", audio_format);
   }
   TF_RETURN_IF_ERROR(ReadValue(wav_string, channel_count, &offset));
+  if (*channel_count < 1) {
+    return errors::InvalidArgument(
+        "Bad number of channels for WAV: Expected at least 1, but got ",
+        *channel_count);
+  }
   TF_RETURN_IF_ERROR(ReadValue(wav_string, sample_rate, &offset));
   uint32 bytes_per_second;
   TF_RETURN_IF_ERROR(ReadValue(wav_string, &bytes_per_second, &offset));
-- 
GitLab


From 49581856c47c2d3d1e81c4b10d9896259f58bae6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 12:13:11 -0700
Subject: [PATCH 0150/1357] Add some debugging checks for categorical split
 handler. Also use MIN_INT64 for the bias feature accumulation since
 categorical_feature_with_xyz  use -1 for out of vocab features.

PiperOrigin-RevId: 212855656
---
 .../contrib/boosted_trees/kernels/split_handler_ops.cc   | 9 +++++++++
 .../lib/learner/batch/categorical_split_handler.py       | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
index 3b28ed77f3..51e0c2e431 100644
--- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc
@@ -862,6 +862,15 @@ class BuildCategoricalEqualitySplitsOp : public OpKernel {
       auto* equality_split = split_info.mutable_split_node()
                                  ->mutable_categorical_id_binary_split();
       equality_split->set_feature_column(state->feature_column_group_id());
+      CHECK(feature_ids(best_feature_idx, 0) != bias_feature_id)
+          << "Unexpected feature ID selected. "
+          << "Start feature ID: [" << start_index << "] "
+          << feature_ids(start_index, 0) << ", " << feature_ids(start_index, 1)
+          << "\nBest feature ID: [" << best_feature_idx << "] "
+          << feature_ids(best_feature_idx, 0) << ", "
+          << feature_ids(best_feature_idx, 1)
+          << "\nPartition IDS: " << partition_ids(start_index) << "  "
+          << partition_ids(best_feature_idx);
       equality_split->set_feature_id(feature_ids(best_feature_idx, 0));
       auto* left_child = split_info.mutable_left_child();
       auto* right_child = split_info.mutable_right_child();
diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py
index 35d727482b..4da25298cb 100644
--- a/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py
+++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/categorical_split_handler.py
@@ -29,7 +29,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 
-_BIAS_FEATURE_ID = -1
+_BIAS_FEATURE_ID = int(dtypes.int64.min)
 
 
 class EqualitySplitHandler(base_split_handler.BaseSplitHandler):
-- 
GitLab


From 54cac449527a6668d5410b6403c1c54d71a9ba82 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 12:24:35 -0700
Subject: [PATCH 0151/1357] Add root of profile broken down by program to
 Profile proto.

PiperOrigin-RevId: 212857508
---
 tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc |  5 ++---
 tensorflow/contrib/tpu/profiler/op_profile.proto    | 10 ++++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index 98cc31f18d..b4b06a40a2 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -142,9 +142,8 @@ Status WriteTensorboardTPUProfile(const string& logdir, const string& run,
     TF_RETURN_IF_ERROR(DumpTraceToLogDirectory(profile_run_dir, host_prefix,
                                                response.encoded_trace(), os));
   }
-  if (response.has_op_profile() &&
-      (response.op_profile().has_by_program_structure() ||
-       response.op_profile().has_by_category())) {
+  if (response.has_op_profile() && (response.op_profile().has_by_program() ||
+                                    response.op_profile().has_by_category())) {
     TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, host_prefix,
                                                    response.op_profile(), os));
   }
diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
index feb177a7da..68cf510e71 100644
--- a/tensorflow/contrib/tpu/profiler/op_profile.proto
+++ b/tensorflow/contrib/tpu/profiler/op_profile.proto
@@ -4,12 +4,14 @@ package tensorflow.tpu.op_profile;
 
 // Profile is the top-level data that summarizes a program.
 message Profile {
+  reserved 2;
+  reserved "by_program_structure";
+  reserved 3;
+  reserved "per_program";
   // Root of a profile broken down by instruction category.
   Node by_category = 1;
-  // Root of a profile broken down by program structure.
-  Node by_program_structure = 2;
-  // Per program profile, indexed by hlo module name of the program.
-  map per_program = 3;
+  // Root of a profile broken down by program.
+  Node by_program = 4;
 }
 
 // An entry in the profile tree. (An instruction, or set of instructions).
-- 
GitLab


From d860915b0198ddb96f93e9e97a789af156544dc6 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy 
Date: Thu, 13 Sep 2018 12:31:47 -0700
Subject: [PATCH 0152/1357] Move nccl_rewrite.cc back to tf_kernel_library.

PiperOrigin-RevId: 212858590
---
 tensorflow/contrib/nccl/BUILD | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD
index 225025e995..9a9d480260 100644
--- a/tensorflow/contrib/nccl/BUILD
+++ b/tensorflow/contrib/nccl/BUILD
@@ -25,7 +25,7 @@ tf_custom_op_library(
     name = "python/ops/_nccl_ops.so",
     srcs = [
         "ops/nccl_ops.cc",
-    ] + if_cuda(["kernels/nccl_rewrite.cc"]),
+    ],
     gpu_srcs = if_not_windows_cuda([
         "kernels/nccl_manager.cc",
         "kernels/nccl_manager.h",
@@ -74,6 +74,7 @@ tf_kernel_library(
         "kernels/nccl_manager.cc",
         "kernels/nccl_manager.h",
         "kernels/nccl_ops.cc",
+        "kernels/nccl_rewrite.cc",
     ]),
     deps = if_cuda([
         "@local_config_nccl//:nccl",
-- 
GitLab


From f2c23922fc4d977a4fbe4d2353f7b14231d63f6b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 12:49:49 -0700
Subject: [PATCH 0153/1357] Clean ups related to runtime shapes refactoring.

PiperOrigin-RevId: 212861571
---
 .../internal/optimized/optimized_ops.h        | 30 ++++----
 .../internal/reference/reference_ops.h        | 72 ++++++++++---------
 2 files changed, 53 insertions(+), 49 deletions(-)

diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
index 370ca03c92..659a65a8ea 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h
@@ -2637,9 +2637,9 @@ inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int input_depth = input_shape.Dims(3);
@@ -2678,9 +2678,9 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_depth = output_shape.Dims(3);
@@ -3508,7 +3508,7 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -5760,9 +5760,9 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
   gemmlowp::ScopedProfilingLabel label("ResizeBilinear");
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -5809,9 +5809,9 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
   gemmlowp::ScopedProfilingLabel label("ResizeBilinear");
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -5870,9 +5870,9 @@ inline void BatchToSpaceND(
 
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input1_shape =
+  const RuntimeShape input1_shape =
       RuntimeShape::ExtendedShape(4, unextended_input1_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_width = output_shape.Dims(2);
@@ -5956,8 +5956,10 @@ inline void PadImpl(const tflite::PadParams& op_params,
                     const P* pad_value_ptr, const RuntimeShape& output_shape,
                     T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Pad");
-  RuntimeShape ext_input_shape = RuntimeShape::ExtendedShape(4, input_shape);
-  RuntimeShape ext_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+  const RuntimeShape ext_input_shape =
+      RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_output_shape =
+      RuntimeShape::ExtendedShape(4, output_shape);
   TFLITE_DCHECK_LE(op_params.left_padding_count, 4);
   TFLITE_DCHECK_LE(op_params.right_padding_count, 4);
 
@@ -6089,7 +6091,7 @@ inline void Slice(const tflite::SliceParams& op_params,
                   const RuntimeShape& input_shape, const T* input_data,
                   const RuntimeShape& output_shape, T* output_data) {
   gemmlowp::ScopedProfilingLabel label("Slice");
-  RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
   // TODO(dkalenichenko): This op only supports 4D tensors or smaller.
   TFLITE_DCHECK_LE(op_params.begin_count, 4);
   TFLITE_DCHECK_LE(op_params.size_count, 4);
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
index 977367026d..66f18ec195 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
@@ -419,9 +419,9 @@ inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
                          T* output_data) {
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int input_depth = input_shape.Dims(3);
@@ -472,9 +472,9 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
                          T* output_data) {
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int input_depth = input_shape.Dims(3);
@@ -1117,7 +1117,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1158,7 +1158,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1200,7 +1200,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1350,7 +1350,7 @@ void BroadcastMul4DSlow(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -1483,7 +1483,7 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params,
   // The input shapes are extended as part of NdArrayDesc initialization.
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
@@ -1579,7 +1579,7 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -1713,7 +1713,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1754,7 +1754,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1818,7 +1818,7 @@ inline void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1858,7 +1858,7 @@ void BroadcastSub4DSlow(const ArithmeticParams& params,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -1897,7 +1897,7 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
                                       &desc2);
-  RuntimeShape extended_output_shape =
+  const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
@@ -3543,11 +3543,11 @@ inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_size_shape =
+  const RuntimeShape output_size_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
@@ -3606,9 +3606,9 @@ inline void SpaceToBatchND(
     const RuntimeShape& unextended_output_shape, T* output_data) {
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input1_shape =
+  const RuntimeShape input1_shape =
       RuntimeShape::ExtendedShape(4, unextended_input1_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int depth = input1_shape.Dims(3);
@@ -3663,9 +3663,9 @@ inline void BatchToSpaceND(
     const RuntimeShape& unextended_output_shape, T* output_data) {
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input1_shape =
+  const RuntimeShape input1_shape =
       RuntimeShape::ExtendedShape(4, unextended_input1_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_width = output_shape.Dims(2);
@@ -3719,8 +3719,10 @@ inline void PadImpl(const tflite::PadParams& op_params,
                     const RuntimeShape& input_shape, const T* input_data,
                     const P* pad_value_ptr, const RuntimeShape& output_shape,
                     T* output_data) {
-  RuntimeShape ext_input_shape = RuntimeShape::ExtendedShape(4, input_shape);
-  RuntimeShape ext_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+  const RuntimeShape ext_input_shape =
+      RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_output_shape =
+      RuntimeShape::ExtendedShape(4, output_shape);
   TFLITE_DCHECK_LE(op_params.left_padding_count, 4);
   TFLITE_DCHECK_LE(op_params.right_padding_count, 4);
 
@@ -3817,9 +3819,9 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   // Reverse and pad to 4 dimensions because that is what the runtime code
@@ -3915,7 +3917,7 @@ template 
 inline void Slice(const tflite::SliceParams& op_params,
                   const RuntimeShape& input_shape, const T* input_data,
                   const RuntimeShape& output_shape, T* output_data) {
-  RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(4, input_shape);
   // TODO(dkalenichenko): This op only supports 4D tensors or smaller.
   TFLITE_DCHECK_LE(op_params.begin_count, 4);
   TFLITE_DCHECK_LE(op_params.size_count, 4);
@@ -4141,9 +4143,9 @@ inline void Mean(const tflite::MeanParams& op_params,
 
   TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape input_shape =
+  const RuntimeShape input_shape =
       RuntimeShape::ExtendedShape(4, unextended_input_shape);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int output_batch = output_shape.Dims(0);
@@ -4290,7 +4292,7 @@ void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4577,7 +4579,7 @@ inline void BroadcastComparison4DSlowImpl(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4636,7 +4638,7 @@ inline void BroadcastComparison4DSlowWithScaling(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4886,7 +4888,7 @@ inline void BroadcastPow4DSlow(const RuntimeShape& unextended_input1_shape,
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4929,7 +4931,7 @@ inline void BroadcastLogical4DSlow(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
@@ -4968,7 +4970,7 @@ inline void BroadcastBinaryFunction4DSlow(
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  RuntimeShape output_shape =
+  const RuntimeShape output_shape =
       RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   NdArrayDesc<4> desc1;
-- 
GitLab


From 2646bf2d2bfb717c828db6391563b431f760a7d3 Mon Sep 17 00:00:00 2001
From: Nupur Garg 
Date: Thu, 13 Sep 2018 13:08:26 -0700
Subject: [PATCH 0154/1357] Internal change.

PiperOrigin-RevId: 212864677
---
 tensorflow/contrib/lite/python/convert.py     | 43 ++++++++++++++++---
 tensorflow/contrib/lite/python/lite.py        | 11 +++++
 tensorflow/contrib/lite/python/lite_test.py   | 22 ++++++++++
 .../contrib/lite/python/tflite_convert.py     | 11 +++++
 4 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py
index 1c5516ae7c..1f48a826d4 100644
--- a/tensorflow/contrib/lite/python/convert.py
+++ b/tensorflow/contrib/lite/python/convert.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import enum  # pylint: disable=g-bad-import-order
+
 import os as _os
 import platform as _platform
 import subprocess as _subprocess
@@ -30,7 +32,6 @@ from tensorflow.python.platform import resource_loader as _resource_loader
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.lazy_loader import LazyLoader
 
-
 # Lazy load since some of the performance benchmark skylark rules
 # break dependencies.
 _toco_python = LazyLoader(
@@ -52,6 +53,31 @@ if _toco_from_proto_bin and not _os.path.exists(_toco_from_proto_bin):
   _toco_from_proto_bin = "toco_from_protos"
 
 
+class ConverterMode(enum.Enum):
+  """Enum class defining the converters available to generate TFLite models.
+
+  WARNING: Experimental interface, subject to change.
+  """
+  # Convert model using TOCO such that all ops are TensorFlow Lite native ops.
+  #
+  # This is the only supported mode for any models that contain operations that
+  # cannot be resolved in TensorFlow.
+  DEFAULT = "DEFAULT"
+
+  # Convert model using TOCO such that only unsupported operations are
+  # represented as TensorFlow ops.
+  # WARNING: Experimental interface, subject to change.
+  TOCO_EXTENDED = "TOCO_EXTENDED"
+
+  # Convert model using TOCO such that all operations are represented as
+  # TensorFlow ops.
+  # WARNING: Experimental interface, subject to change.
+  TOCO_EXTENDED_ALL = "TOCO_EXTENDED_ALL"
+
+  def __str__(self):
+    return self.value
+
+
 def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str):
   """Convert `input_data_str` according to model and toco parameters.
 
@@ -128,7 +154,8 @@ def build_toco_convert_protos(input_tensors,
                               change_concat_input_ranges=False,
                               post_training_quantize=False,
                               dump_graphviz_dir=None,
-                              dump_graphviz_video=False):
+                              dump_graphviz_video=False,
+                              converter_mode=ConverterMode.DEFAULT):
   """Builds protocol buffers describing a conversion of a model using TOCO.
 
   Typically this is to convert from TensorFlow GraphDef to TFLite, in which
@@ -183,6 +210,8 @@ def build_toco_convert_protos(input_tensors,
       output file. (default None)
     dump_graphviz_video: Boolean indicating whether to dump the graph after
       every graph transformation. (default False)
+    converter_mode: Experimental flag, subject to change. ConverterMode
+      indicating which converter to use. (default ConverterMode.DEFAULT)
 
   Returns:
     model_flags, toco_flags: two protocol buffers describing the conversion
@@ -211,6 +240,11 @@ def build_toco_convert_protos(input_tensors,
   if dump_graphviz_dir:
     toco.dump_graphviz_dir = dump_graphviz_dir
   toco.dump_graphviz_include_video = dump_graphviz_video
+  if converter_mode == ConverterMode.TOCO_EXTENDED:
+    toco.allow_eager_ops = True
+  elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL:
+    toco.allow_eager_ops = True
+    toco.force_eager_ops = True
 
   model = _model_flags_pb2.ModelFlags()
   model.change_concat_input_ranges = change_concat_input_ranges
@@ -301,9 +335,8 @@ def toco_convert_impl(input_data, input_tensors, output_tensors, *args,
   Raises:
     Defined in `build_toco_convert_protos`.
   """
-  model_flags, toco_flags = build_toco_convert_protos(input_tensors,
-                                                      output_tensors,
-                                                      *args, **kwargs)
+  model_flags, toco_flags = build_toco_convert_protos(
+      input_tensors, output_tensors, *args, **kwargs)
   data = toco_convert_protos(model_flags.SerializeToString(),
                              toco_flags.SerializeToString(),
                              input_data.SerializeToString())
diff --git a/tensorflow/contrib/lite/python/lite.py b/tensorflow/contrib/lite/python/lite.py
index 44dfb97b84..2be24455d8 100644
--- a/tensorflow/contrib/lite/python/lite.py
+++ b/tensorflow/contrib/lite/python/lite.py
@@ -40,6 +40,7 @@ from google.protobuf import text_format as _text_format
 from google.protobuf.message import DecodeError
 from tensorflow.contrib.lite.python import lite_constants as constants
 from tensorflow.contrib.lite.python.convert import build_toco_convert_protos  # pylint: disable=unused-import
+from tensorflow.contrib.lite.python.convert import ConverterMode
 from tensorflow.contrib.lite.python.convert import tensor_name as _tensor_name
 from tensorflow.contrib.lite.python.convert import toco_convert  # pylint: disable=unused-import
 from tensorflow.contrib.lite.python.convert import toco_convert_graph_def as _toco_convert_graph_def
@@ -113,6 +114,8 @@ class TocoConverter(object):
       output file. (default None)
     dump_graphviz_video: Boolean indicating whether to dump the graph after
       every graph transformation. (default False)
+    converter_mode: Experimental flag, subject to change. ConverterMode
+      indicating which converter to use. (default ConverterMode.DEFAULT)
 
   Example usage:
 
@@ -179,6 +182,7 @@ class TocoConverter(object):
     self.post_training_quantize = False
     self.dump_graphviz_dir = None
     self.dump_graphviz_video = False
+    self.converter_mode = ConverterMode.DEFAULT
 
     # Attributes are used by models that cannot be loaded into TensorFlow.
     if not self._has_valid_tensors():
@@ -389,6 +393,7 @@ class TocoConverter(object):
       ValueError:
         Input shape is not specified.
         None value for dimension in input_tensor.
+        ConverterMode option is unsupported for the model.
     """
     # Checks dimensions in input tensor.
     if self._has_valid_tensors():
@@ -439,12 +444,18 @@ class TocoConverter(object):
 
     # Converts model.
     if self._has_valid_tensors():
+      converter_kwargs["converter_mode"] = self.converter_mode
       result = _toco_convert_impl(
           input_data=self._graph_def,
           input_tensors=self._input_tensors,
           output_tensors=self._output_tensors,
           **converter_kwargs)
     else:
+      # Graphs without valid tensors cannot be loaded into tf.Session since they
+      # contain TFLite operation(s) that cannot be resolved in TensorFlow.
+      if self.converter_mode != ConverterMode.DEFAULT:
+        raise ValueError("This model can only be converted with the default "
+                         "converter.")
       result = _toco_convert_graph_def(
           input_data=self._graph_def,
           input_arrays_with_shape=self._input_arrays_with_shape,
diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py
index 3f8ea433ff..f112ed5cdd 100644
--- a/tensorflow/contrib/lite/python/lite_test.py
+++ b/tensorflow/contrib/lite/python/lite_test.py
@@ -402,6 +402,28 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     # Ensure that the quantized weights tflite model is smaller.
     self.assertTrue(len(quantized_tflite) < len(float_tflite))
 
+  def testExtendedMode(self):
+    in_tensor = array_ops.placeholder(
+        shape=[1, 16, 16, 3], dtype=dtypes.float32)
+    out_tensor = in_tensor + in_tensor
+    sess = session.Session()
+
+    # Convert model and ensure model is not None.
+    converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor])
+    converter.converter_mode = lite.ConverterMode.TOCO_EXTENDED_ALL
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+    # Ensures the model contains TensorFlow ops.
+    # TODO(nupurgarg): Check values once there is a Python delegate interface.
+    interpreter = Interpreter(model_content=tflite_model)
+    with self.assertRaises(RuntimeError) as error:
+      interpreter.allocate_tensors()
+    self.assertIn(
+        'Regular TensorFlow ops are not supported by this interpreter. Make '
+        'sure you invoke the Eager delegate before inference.',
+        str(error.exception))
+
 
 class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py
index cc08ed3fe9..c0ff7f37f9 100644
--- a/tensorflow/contrib/lite/python/tflite_convert.py
+++ b/tensorflow/contrib/lite/python/tflite_convert.py
@@ -140,8 +140,11 @@ def _convert_model(flags):
   if flags.change_concat_input_ranges:
     converter.change_concat_input_ranges = (
         flags.change_concat_input_ranges == "TRUE")
+
   if flags.allow_custom_ops:
     converter.allow_custom_ops = flags.allow_custom_ops
+  if flags.converter_mode:
+    converter.converter_mode = flags.converter_mode
 
   if flags.post_training_quantize:
     converter.post_training_quantize = flags.post_training_quantize
@@ -363,6 +366,8 @@ def run_main(_):
       help=("Boolean to change behavior of min/max ranges for inputs and "
             "outputs of the concat operator for quantized models. Changes the "
             "ranges of concat operator overlap when true. (default False)"))
+
+  # Permitted ops flags.
   parser.add_argument(
       "--allow_custom_ops",
       action="store_true",
@@ -371,6 +376,12 @@ def run_main(_):
             "created for any op that is unknown. The developer will need to "
             "provide these to the TensorFlow Lite runtime with a custom "
             "resolver. (default False)"))
+  parser.add_argument(
+      "--converter_mode",
+      type=lite.ConverterMode,
+      choices=list(lite.ConverterMode),
+      help=("Experimental flag, subject to change. ConverterMode indicating "
+            "which converter to use. (default ConverterMode.DEFAULT)"))
 
   # Logging flags.
   parser.add_argument(
-- 
GitLab


From df46916ab0f8aa9fbf45f6847c9216ecc90515a9 Mon Sep 17 00:00:00 2001
From: Scott Zhu 
Date: Thu, 13 Sep 2018 13:54:44 -0700
Subject: [PATCH 0155/1357] Allow user to the pre register a defun function
 into graph without calling it.

PiperOrigin-RevId: 212872452
---
 tensorflow/python/eager/function.py      | 28 +++++++++
 tensorflow/python/eager/function_test.py | 78 ++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 348bf4650f..552ed29f65 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1204,6 +1204,34 @@ class PolymorphicFunction(object):
       return graph_function, (args, kwds)
 
 
+def register(func, *args, **kwargs):
+  """Register the defun function into the graph.
+
+  This won't actually call the function with the inputs, and only put the
+  function definition into graph. Register function with different input param
+  will result into multiple version of functions registered in graph.
+
+  Args:
+    func: the PolymorphicFunction instance that generated by a @defun
+    *args: input arguments for the Python function.
+    **kwargs: input keyword arguments for the Python function.
+
+  Returns:
+    a `Function` object specialized to inputs and execution context.
+
+  Raises:
+    ValueError: When the input function is not a defun wrapped python function.
+  """
+  if not isinstance(func, PolymorphicFunction):
+    raise ValueError("Only defun function is allowed to be registered. "
+                     "Got type: %s" % type(func))
+  concrete_func = func.get_concrete_function(*args, **kwargs)
+  graph = ops.get_default_graph()
+  concrete_func._inference_function.add_to_graph(graph)   # pylint: disable=protected-access
+  # TODO(scottzhu): support concrete_func._backward_graph_function in future.
+  return concrete_func
+
+
 def _validate_signature(signature):
   if any(not isinstance(arg, tensor_spec.TensorSpec)
          for arg in nest.flatten(signature)):
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index d2b1d9c8a7..a0abefe666 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -1607,6 +1607,84 @@ class FunctionTest(test.TestCase):
           t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
           add(t, t)
 
+  def testRegisterFunction(self):
+    @function.defun
+    def add(x, y):
+      return math_ops.add(x, y)
+
+    def matmul(x, y):
+      return math_ops.matmul(x, y)
+    defun_matmul = function.defun(matmul)
+
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        function.register(defun_matmul, t, t)
+        function.register(add, t, t)
+
+        graph = ops.get_default_graph()
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 2)
+        functions = list(graph._functions.values())
+        pre_register_matmul_func_name = functions[0].definition.signature.name
+        self.assertRegexpMatches(pre_register_matmul_func_name, '.*matmul.*')
+        pre_register_add_func_name = functions[1].definition.signature.name
+        self.assertRegexpMatches(pre_register_add_func_name, '.*add.*')
+
+        sq = defun_matmul(t, t)
+        double = add(t, t)
+        self.assertAllEqual(sq.eval().reshape(-1), [7, 10, 15, 22])
+        self.assertAllEqual(double.eval().reshape(-1), [2, 4, 6, 8])
+        # Make sure the pre registered function is used, and no other function
+        # is added.
+        self.assertEqual(len(graph._functions), 2)
+        functions = list(graph._functions.values())
+        called_func_name = functions[0].definition.signature.name
+        self.assertEqual(pre_register_matmul_func_name, called_func_name)
+        called_func_name = functions[1].definition.signature.name
+        self.assertEqual(pre_register_add_func_name, called_func_name)
+
+  def testRegisterFunctionWithInputSignature(self):
+    def matmul(x, y):
+      return math_ops.matmul(x, y)
+    defun_matmul = function.defun(
+        matmul,
+        input_signature=[
+            tensor_spec.TensorSpec(shape=(2, 2), dtype=dtypes.float32),
+            tensor_spec.TensorSpec(shape=(2, 2), dtype=dtypes.float32)
+        ])
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        function.register(defun_matmul, t, t)
+
+        graph = ops.get_default_graph()
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 1)
+
+        # Test input param shape mismatch
+        t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+        with self.assertRaisesRegexp(
+            ValueError, 'Python inputs incompatible with input_signature'):
+          function.register(defun_matmul, t2, t2)
+
+  def testRegisterFunctionWithCache(self):
+    def matmul(x, y):
+      return math_ops.matmul(x, y)
+    defun_matmul = function.defun(matmul)
+
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        t2 = constant_op.constant([[2.0, 3.0], [4.0, 5.0]])
+        function.register(defun_matmul, t, t)
+        function.register(defun_matmul, t2, t2)
+
+        graph = ops.get_default_graph()
+        # Only one function is registered since the input param are in same type
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 1)
+
 
 @test_util.with_c_shapes
 class AutomaticControlDependenciesTest(test.TestCase):
-- 
GitLab


From c4c80a3fe7f585748110056dade5748856b34f5c Mon Sep 17 00:00:00 2001
From: Mark Daoust 
Date: Thu, 13 Sep 2018 13:55:35 -0700
Subject: [PATCH 0156/1357] internal change

PiperOrigin-RevId: 212872625
---
 tensorflow/tools/docs/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py
index a6159fa692..83b4bf8128 100644
--- a/tensorflow/tools/docs/parser.py
+++ b/tensorflow/tools/docs/parser.py
@@ -1479,7 +1479,7 @@ class ParserConfig(object):
     self.base_dir = base_dir
     self.defined_in_prefix = 'tensorflow/'
     self.code_url_prefix = (
-        'https://www.tensorflow.org/code/tensorflow/')  # pylint: disable=line-too-long
+        '/code/stable/tensorflow/')  # pylint: disable=line-too-long
 
   def py_name_to_object(self, full_name):
     """Return the Python object for a Python symbol name."""
-- 
GitLab


From 490e46f29dba0254fa69385d4235ab26854868c8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 13:59:02 -0700
Subject: [PATCH 0157/1357] Increase test timeout for xla_ops_test to de-flake.

PiperOrigin-RevId: 212873250
---
 tensorflow/compiler/tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index e7623582f6..2176eaebe4 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1198,7 +1198,7 @@ tf_xla_py_test(
 
 tf_xla_py_test(
     name = "xla_ops_test",
-    size = "small",
+    size = "medium",
     srcs = ["xla_ops_test.py"],
     disabled_backends = ["cpu_ondemand"],
     deps = [
-- 
GitLab


From 304faf0444260912b6996d39227417c09561c37e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 13:59:24 -0700
Subject: [PATCH 0158/1357] Remove tf.contrib.get_signature_def_by_key. This
 can be replaced by meta_graph_def.signature_def[signature_def_key]

PiperOrigin-RevId: 212873314
---
 .../predictor/saved_model_predictor.py        |  19 +-
 tensorflow/contrib/saved_model/BUILD          |  17 --
 tensorflow/contrib/saved_model/__init__.py    |   2 -
 .../python/saved_model/__init__.py            |   1 -
 .../python/saved_model/signature_def_utils.py |  42 ----
 .../saved_model/signature_def_utils_test.py   | 191 ------------------
 tensorflow/python/tools/saved_model_cli.py    |   7 +-
 7 files changed, 9 insertions(+), 270 deletions(-)
 delete mode 100644 tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py
 delete mode 100644 tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py

diff --git a/tensorflow/contrib/predictor/saved_model_predictor.py b/tensorflow/contrib/predictor/saved_model_predictor.py
index 95da6d04ed..03399396df 100644
--- a/tensorflow/contrib/predictor/saved_model_predictor.py
+++ b/tensorflow/contrib/predictor/saved_model_predictor.py
@@ -23,7 +23,6 @@ import logging
 
 from tensorflow.contrib.predictor import predictor
 from tensorflow.contrib.saved_model.python.saved_model import reader
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 from tensorflow.python.client import session
 from tensorflow.python.framework import ops
 from tensorflow.python.saved_model import loader
@@ -68,23 +67,19 @@ def _get_signature_def(signature_def_key, export_dir, tags):
   metagraph_def = get_meta_graph_def(export_dir, tags)
 
   try:
-    signature_def = signature_def_utils.get_signature_def_by_key(
-        metagraph_def,
+    signature_def = metagraph_def.signature_def[signature_def_key]
+  except KeyError as e:
+    formatted_key = _DEFAULT_INPUT_ALTERNATIVE_FORMAT.format(
         signature_def_key)
-  except ValueError as e:
     try:
-      formatted_key = _DEFAULT_INPUT_ALTERNATIVE_FORMAT.format(
-          signature_def_key)
-      signature_def = signature_def_utils.get_signature_def_by_key(
-          metagraph_def, formatted_key)
-
-      logging.warning('Could not find signature def "%s". '
-                      'Using "%s" instead', signature_def_key, formatted_key)
-    except ValueError:
+      signature_def = metagraph_def.signature_def[formatted_key]
+    except KeyError:
       raise ValueError(
           'Got signature_def_key "{}". Available signatures are {}. '
           'Original error:\n{}'.format(
               signature_def_key, list(metagraph_def.signature_def), e))
+    logging.warning('Could not find signature def "%s". '
+                    'Using "%s" instead', signature_def_key, formatted_key)
   return signature_def
 
 
diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD
index f687b56ea3..4ca5274b2e 100644
--- a/tensorflow/contrib/saved_model/BUILD
+++ b/tensorflow/contrib/saved_model/BUILD
@@ -78,23 +78,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "signature_def_utils_test",
-    size = "small",
-    srcs = ["python/saved_model/signature_def_utils_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":saved_model_py",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python/saved_model:signature_constants",
-        "//tensorflow/python/saved_model:signature_def_utils",
-        "//tensorflow/python/saved_model:utils",
-    ],
-)
-
 py_library(
     name = "keras_saved_model",
     srcs = ["python/saved_model/keras_saved_model.py"],
diff --git a/tensorflow/contrib/saved_model/__init__.py b/tensorflow/contrib/saved_model/__init__.py
index 074dc655ac..ac95e38011 100644
--- a/tensorflow/contrib/saved_model/__init__.py
+++ b/tensorflow/contrib/saved_model/__init__.py
@@ -25,13 +25,11 @@ from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import,line-too-long
 from tensorflow.contrib.saved_model.python.saved_model.keras_saved_model import *
-from tensorflow.contrib.saved_model.python.saved_model.signature_def_utils import *
 # pylint: enable=unused-import,wildcard-import,line-too-long
 
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
-    "get_signature_def_by_key",
     "load_keras_model",
     "save_keras_model"]
 
diff --git a/tensorflow/contrib/saved_model/python/saved_model/__init__.py b/tensorflow/contrib/saved_model/python/saved_model/__init__.py
index e3b76bb6f3..fd3dc1d7aa 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/__init__.py
+++ b/tensorflow/contrib/saved_model/python/saved_model/__init__.py
@@ -25,5 +25,4 @@ from __future__ import print_function
 
 # pylint: disable=wildcard-import
 from tensorflow.contrib.saved_model.python.saved_model import keras_saved_model
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 # pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py b/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py
deleted file mode 100644
index f521647999..0000000000
--- a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SignatureDef utility functions implementation."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-def get_signature_def_by_key(meta_graph_def, signature_def_key):
-  """Utility function to get a SignatureDef protocol buffer by its key.
-
-  Args:
-    meta_graph_def: MetaGraphDef protocol buffer with the SignatureDefMap to
-      look up.
-    signature_def_key: Key of the SignatureDef protocol buffer to find in the
-      SignatureDefMap.
-
-  Returns:
-    A SignatureDef protocol buffer corresponding to the supplied key, if it
-    exists.
-
-  Raises:
-    ValueError: If no entry corresponding to the supplied key is found in the
-    SignatureDefMap of the MetaGraphDef.
-  """
-  if signature_def_key not in meta_graph_def.signature_def:
-    raise ValueError("No SignatureDef with key '%s' found in MetaGraphDef." %
-                     signature_def_key)
-  return meta_graph_def.signature_def[signature_def_key]
diff --git a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py b/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py
deleted file mode 100644
index d2e14f73e4..0000000000
--- a/tensorflow/contrib/saved_model/python/saved_model/signature_def_utils_test.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for SignatureDef utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils as signature_def_contrib_utils
-from tensorflow.core.protobuf import meta_graph_pb2
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import signature_constants
-from tensorflow.python.saved_model import signature_def_utils
-from tensorflow.python.saved_model import utils
-
-
-class SignatureDefUtilsTest(test.TestCase):
-
-  def _add_to_signature_def_map(self, meta_graph_def, signature_def_map=None):
-    if signature_def_map is not None:
-      for key in signature_def_map:
-        meta_graph_def.signature_def[key].CopyFrom(signature_def_map[key])
-
-  def _check_tensor_info(self, tensor_info_map, map_key, expected_tensor_name):
-    actual_tensor_info = tensor_info_map[map_key]
-    self.assertEqual(expected_tensor_name, actual_tensor_info.name)
-
-  def testGetSignatureDefByKey(self):
-    x = array_ops.placeholder(dtypes.float32, 1, name="x")
-    x_tensor_info = utils.build_tensor_info(x)
-
-    y = array_ops.placeholder(dtypes.float32, name="y")
-    y_tensor_info = utils.build_tensor_info(y)
-
-    foo_signature_def = signature_def_utils.build_signature_def({
-        "foo-input": x_tensor_info
-    }, {"foo-output": y_tensor_info}, "foo-method-name")
-    bar_signature_def = signature_def_utils.build_signature_def({
-        "bar-input": x_tensor_info
-    }, {"bar-output": y_tensor_info}, "bar-method-name")
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(
-        meta_graph_def, {"foo": foo_signature_def,
-                         "bar": bar_signature_def})
-
-    # Look up a key that does not exist in the SignatureDefMap.
-    missing_key = "missing-key"
-    with self.assertRaisesRegexp(
-        ValueError,
-        "No SignatureDef with key '%s' found in MetaGraphDef" % missing_key):
-      signature_def_contrib_utils.get_signature_def_by_key(
-          meta_graph_def, missing_key)
-
-    # Look up the key, `foo` which exists in the SignatureDefMap.
-    foo_signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "foo")
-    self.assertTrue("foo-method-name", foo_signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(foo_signature_def.inputs))
-    self._check_tensor_info(foo_signature_def.inputs, "foo-input", "x:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(1, len(foo_signature_def.outputs))
-    self._check_tensor_info(foo_signature_def.outputs, "foo-output", "y:0")
-
-    # Look up the key, `bar` which exists in the SignatureDefMap.
-    bar_signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "bar")
-    self.assertTrue("bar-method-name", bar_signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(bar_signature_def.inputs))
-    self._check_tensor_info(bar_signature_def.inputs, "bar-input", "x:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(1, len(bar_signature_def.outputs))
-    self._check_tensor_info(bar_signature_def.outputs, "bar-output", "y:0")
-
-  def testGetSignatureDefByKeyRegression(self):
-    input1 = constant_op.constant("a", name="input-1")
-    output1 = constant_op.constant(7.2, name="output-1")
-
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(meta_graph_def, {
-        "my_regression":
-            signature_def_utils.regression_signature_def(input1, output1)
-    })
-
-    # Look up the regression signature with the key used while saving.
-    signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "my_regression")
-
-    # Check the method name to match the constants regression method name.
-    self.assertEqual(signature_constants.REGRESS_METHOD_NAME,
-                     signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(signature_def.inputs))
-    self._check_tensor_info(signature_def.inputs,
-                            signature_constants.REGRESS_INPUTS, "input-1:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(1, len(signature_def.outputs))
-    self._check_tensor_info(signature_def.outputs,
-                            signature_constants.REGRESS_OUTPUTS, "output-1:0")
-
-  def testGetSignatureDefByKeyClassification(self):
-    input1 = constant_op.constant("a", name="input-1")
-    output1 = constant_op.constant("b", name="output-1")
-    output2 = constant_op.constant(3.0, name="output-2")
-
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(meta_graph_def, {
-        "my_classification":
-            signature_def_utils.classification_signature_def(
-                input1, output1, output2)
-    })
-
-    # Look up the classification signature def with the key used while saving.
-    signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "my_classification")
-
-    # Check the method name to match the constants classification method name.
-    self.assertEqual(signature_constants.CLASSIFY_METHOD_NAME,
-                     signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(1, len(signature_def.inputs))
-    self._check_tensor_info(signature_def.inputs,
-                            signature_constants.CLASSIFY_INPUTS, "input-1:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(2, len(signature_def.outputs))
-    self._check_tensor_info(signature_def.outputs,
-                            signature_constants.CLASSIFY_OUTPUT_CLASSES,
-                            "output-1:0")
-    self._check_tensor_info(signature_def.outputs,
-                            signature_constants.CLASSIFY_OUTPUT_SCORES,
-                            "output-2:0")
-
-  def testPredictionSignatureDef(self):
-    input1 = constant_op.constant("a", name="input-1")
-    input2 = constant_op.constant("b", name="input-2")
-    output1 = constant_op.constant("c", name="output-1")
-    output2 = constant_op.constant("d", name="output-2")
-
-    meta_graph_def = meta_graph_pb2.MetaGraphDef()
-    self._add_to_signature_def_map(meta_graph_def, {
-        "my_prediction":
-            signature_def_utils.predict_signature_def({
-                "input-1": input1,
-                "input-2": input2
-            }, {"output-1": output1,
-                "output-2": output2})
-    })
-
-    # Look up the prediction signature def with the key used while saving.
-    signature_def = signature_def_contrib_utils.get_signature_def_by_key(
-        meta_graph_def, "my_prediction")
-    self.assertEqual(signature_constants.PREDICT_METHOD_NAME,
-                     signature_def.method_name)
-
-    # Check inputs in signature def.
-    self.assertEqual(2, len(signature_def.inputs))
-    self._check_tensor_info(signature_def.inputs, "input-1", "input-1:0")
-    self._check_tensor_info(signature_def.inputs, "input-2", "input-2:0")
-
-    # Check outputs in signature def.
-    self.assertEqual(2, len(signature_def.outputs))
-    self._check_tensor_info(signature_def.outputs, "output-1", "output-1:0")
-    self._check_tensor_info(signature_def.outputs, "output-2", "output-2:0")
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index c5289564fe..d8ba13d8d2 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -33,7 +33,6 @@ import numpy as np
 
 from six import integer_types
 from tensorflow.contrib.saved_model.python.saved_model import reader
-from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 from tensorflow.core.example import example_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.python.client import session
@@ -97,8 +96,7 @@ def _get_inputs_tensor_info_from_meta_graph_def(meta_graph_def,
   Returns:
     A dictionary that maps input tensor keys to TensorInfos.
   """
-  return signature_def_utils.get_signature_def_by_key(meta_graph_def,
-                                                      signature_def_key).inputs
+  return meta_graph_def.signature_def[signature_def_key].inputs
 
 
 def _get_outputs_tensor_info_from_meta_graph_def(meta_graph_def,
@@ -116,8 +114,7 @@ def _get_outputs_tensor_info_from_meta_graph_def(meta_graph_def,
   Returns:
     A dictionary that maps output tensor keys to TensorInfos.
   """
-  return signature_def_utils.get_signature_def_by_key(meta_graph_def,
-                                                      signature_def_key).outputs
+  return meta_graph_def.signature_def[signature_def_key].outputs
 
 
 def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key, indent=0):
-- 
GitLab


From 4053f4d89ee9c8fdd8389c6604347449ced4fabf Mon Sep 17 00:00:00 2001
From: Austin Anderson 
Date: Mon, 10 Sep 2018 14:31:25 -0700
Subject: [PATCH 0159/1357] Add 1.11 release notes (#22067)

---
 RELEASE.md | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/RELEASE.md b/RELEASE.md
index bdc23795e5..2f26623373 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,9 +1,86 @@
+# Release 1.11.0
+
+## Major Features and Improvements
+
+* Nvidia GPU:
+  * Prebuilt binaries are now (as of TensorFlow 1.11) built against cuDNN 7.2 and TensorRT 4. See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support)
+* Google Cloud TPU:
+  * Experimental tf.data integration for Keras on Google Cloud TPUs.
+  * Experimental / preview support for eager execution on Google Cloud TPUs.
+* DistributionStrategy:
+  * Add multi-GPU DistributionStrategy support in tf.keras. Users can now use `fit`, `evaluate` and `predict` to distribute their model on multiple GPUs.
+  * Add multi-worker DistributionStrategy and standalone client support in Estimator. See [README] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute) for more details.
+* Add C, C++, and Python functions for querying kernels
+
+## Breaking Changes
+
+* Keras:
+  * The default values for tf.keras `RandomUniform`, `RandomNormal`, and `TruncatedNormal` initializers have been changed to match those in external Keras.
+  * Breaking change: `model.get_config()` on a Sequential model now returns a config dictionary (consistent with other Model instances) instead of a list of configs for the underlying layers.
+
+## Bug Fixes and Other Changes
+
+* C++:
+  * Changed the signature of SessionFactory::NewSession so that it can return a meaningful error message on failure.
+* tf.data:
+  * Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`. [tf.data] Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`.
+  * `tf.data.Dataset.list_files()` raises an exception at initialization time if the argument matches no files.
+  * Renamed BigTable class to BigtableTable for clarity
+  * Document use of the Cloud Bigtable API
+  * Adding `tf.contrib.data.reduce_dataset` which can be used to reduce a dataset to a single element.
+  * Generalization of `tf.contrib.data.sliding_window_batch`.
+* INC:
+  * Runtime improvements to triangular solve.
+* `tf.contrib`:
+  * Add an `implementation` argument to `tf.keras.layers.LocallyConnected2D` and `tf.keras.layers.LocallyConnected1D`. The new mode (`implementation=2`) performs forward pass as a single dense matrix multiplication, allowing dramatic speedups in certain scenarios (but worse performance in others - see docstring). The option also allows to use `padding=same`.
+  * Add documentation clarifying the differences between tf.fill and tf.constant.
+  * Add experimental IndexedDatasets.
+  * Add selective registration target using the lite proto runtime.
+  * Add simple Tensor and DataType classes to TensorFlow Lite Java
+  * Add support for bitcasting to/from uint32 and uint64.
+  * Added a subclass of Estimator that can be created from a SavedModel (SavedModelEstimator).
+  * Adds leaf index modes as an argument.
+  * Allow a different output shape from the input in tf.contrib.image.transform.
+  * Change the state_size order of the StackedRNNCell to be natural order. To keep the existing behavior, user can add reverse_state_order=True when constructing the StackedRNNCells.
+  * Deprecate self.test_session() in favor of self.session() or self.cached_session().
+  * Directly import tensor.proto.h (the transitive import will be removed from tensor.h soon)
+  * Estimator.train() now supports tf.contrib.summary.\* summaries out of the box; each call to .train() will now create a separate tfevents file rather than re-using a shared one.
+  * Fix FTRL L2-shrinkage behavior: the gradient from the L2 shrinkage term should not end up in the accumulator.
+  * Fix toco compilation/execution on Windows
+  * GoogleZoneProvider class added to detect  which Google Cloud Engine zone tensorflow is running in.
+  * It is now safe to call any of the C API's TF_Delete\* functions on nullptr
+  * Log some errors on Android to logcat
+  * Match FakeQuant numerics in TFLite to improve accuracy of TFLite quantized inference models.
+  * Optional bucket location check for the GCS Filesystem.
+  * Performance enhancements for StringSplitOp & StringSplitV2Op.
+  * Performance improvements for regex replace operations.
+  * TFRecordWriter now raises an error if .write() fails.
+  * TPU: More helpful error messages in TPUClusterResolvers.
+  * The legacy_init_op argument to SavedModelBuilder methods for adding MetaGraphs has been deprecated. Please use the equivalent main_op argument instead. As part of this, we now explicitly check for a single main_op or legacy_init_op at the time of SavedModel building, whereas the check on main_op was previously only done at load time.
+  * The protocol used for Estimator training is now configurable in RunConfig.
+  * Triangular solve performance improvements.
+  * Unify RNN cell interface between TF and Keras. Add new get_initial_state() to Keras and TF RNN cell, which will use to replace the existing zero_state() method.
+  * Update initialization of variables in Keras.
+  * Updates to "constrained_optimization" in tensorflow/contrib.
+  * boosted trees: adding pruning mode
+  * tf.train.Checkpoint does not delete old checkpoints by default.
+  * tfdbg: Limit the total disk space occupied by dumped tensor data to 100 GBytes. Add environment variable `TFDBG_DISK_BYTES_LIMIT` to allow adjustment of this upper limit.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+Aapeli, adoda, Ag Ramesh, Amogh Mannekote, Andrew Gibiansky, Andy Craze, Anirudh Koul, Aurelien Geron, Avijit, Avijit-Nervana, Ben, Benjamin H. Myara, bhack, Brett Koonce, Cao Zongyan, cbockman, cheerss, Chikanaga Tomoyuki, Clayne Robison, cosine0, Cui Wei, Dan J, David, David Norman, Dmitry Klimenkov, Eliel Hojman, Florian Courtial, fo40225, formath, Geoffrey Irving, gracehoney, Grzegorz Pawelczak, Guoliang Hua, Guozhong Zhuang, Herman Zvonimir DošIlović, HuiyangFei, Jacker, Jan HüNnemeyer, Jason Taylor, Jason Zaman, Jesse, Jiang,Zhoulong, Jiawei Zhang, Jie, Joe Yearsley, Johannes Schmitz, Jon Perl, Jon Triebenbach, Jonathan, Jonathan Hseu, Jongmin Park, Justin Shenk, karl@kubx.ca, Kate Hodesdon, Kb Sriram, Keishi Hattori, Kenneth Blomqvist, Koan-Sin Tan, Li Liangbin, Li, Yiqiang, Loo Rong Jie, Madiyar, Mahmoud Abuzaina, Mark Ryan, Matt Dodge, mbhuiyan, melvinljy96, Miguel Mota, Nafis Sadat, Nathan Luehr, naurril, Nehal J Wani, Niall Moran, Niranjan Hasabnis, Nishidha Panpaliya, npow, olicht, Pei Zhang, Peng Wang (Simpeng), Peng Yu, Philipp Jund, Pradeep Banavara, Pratik Kalshetti, qwertWZ, Rakesh Chada, Randy West, Ray Kim, Rholais Lii, Robin Richtsfeld, Rodrigo Silveira, Ruizhi, Santosh Kumar, Seb Bro, Sergei Lebedev, sfujiwara, Shaba Abhiram, Shashi, SneakyFish5, Soila Kavulya, Stefan Dyulgerov, Steven Winston, Sunitha Kambhampati, Surry Shome, Taehoon Lee, Thor Johnsen, Tristan Rice, TShapinsky, tucan, tucan9389, Vicente Reyes, Vilmar-Hillow, Vitaly Lavrukhin, wangershi, weidan.kong, weidankong, Wen-Heng (Jack) Chung, William D. Irons, Wim Glenn, XFeiF, Yan Facai (颜发才), Yanbo Liang, Yong Tang, Yoshihiro Yamazaki, Yuan (Terry) Tang, Yuan, Man, zhaoyongke, ÁRon
+Ricardo Perez-Lopez, 张天启, 张晓飞
+
+
 # Release 1.10.1
 ## Bug Fixes and Other Changes
 
 * `tf.keras`:
   * Fixing keras on Cloud TPUs. No new binaries will be built for Windows.
 
+
 # Release 1.10.0
 
 ## Major Features And Improvements
-- 
GitLab


From d46753f993def43f6c878120e52f5dba598ceae2 Mon Sep 17 00:00:00 2001
From: Austin Anderson 
Date: Tue, 4 Sep 2018 15:48:50 -0700
Subject: [PATCH 0160/1357] Update TF version strings (#22070)

---
 tensorflow/core/public/version.h             | 4 ++--
 tensorflow/tools/docker/Dockerfile.devel     | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +-
 tensorflow/tools/docker/Dockerfile.devel-mkl | 2 +-
 tensorflow/tools/pip_package/setup.py        | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 4129c93af5..1f71e24eeb 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -19,12 +19,12 @@ limitations under the License.
 // TensorFlow uses semantic versioning, see http://semver.org/.
 
 #define TF_MAJOR_VERSION 1
-#define TF_MINOR_VERSION 10
+#define TF_MINOR_VERSION 11
 #define TF_PATCH_VERSION 0
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX ""
+#define TF_VERSION_SUFFIX "-rc0"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 39e7bc8b66..c741e8ad0c 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -78,7 +78,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.10 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # TODO(craigcitro): Don't install the pip package, since it makes it
 # more difficult to experiment with local changes. Instead, just add
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index e487779e7a..f544725af4 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -100,7 +100,7 @@ RUN mkdir /bazel && \
 
 # Download and build TensorFlow.
 WORKDIR /tensorflow
-RUN git clone --branch=r1.10 --depth=1 https://github.com/tensorflow/tensorflow.git .
+RUN git clone --branch=r1.11 --depth=1 https://github.com/tensorflow/tensorflow.git .
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index 371451d2aa..db7c701289 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -3,7 +3,7 @@ FROM ubuntu:16.04
 LABEL maintainer="Clayne Robison "
 
 # These parameters can be overridden by parameterized_docker_build.sh
-ARG TF_BUILD_VERSION=r1.10
+ARG TF_BUILD_VERSION=r1.11
 ARG PYTHON="python"
 ARG PYTHON3_DEV=""
 ARG WHL_DIR="/tmp/pip"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3102239a19..8442e58f20 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.10.0'
+_VERSION = '1.11.0-rc0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 885cd2942ae7b6239146a3f51ec3d6948ac2b89e Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Thu, 13 Sep 2018 14:17:30 -0700
Subject: [PATCH 0161/1357] No segfault in GradientTape with partially unknown
 shapes.

PiperOrigin-RevId: 212876876
---
 tensorflow/python/eager/pywrap_tfe_src.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 1a8f3577b2..9f2f4e06ad 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -1403,9 +1403,13 @@ class PyVSpace
     PyObject* arglist =
         Py_BuildValue("(O)", reinterpret_cast(tensor));
     PyObject* result = PyEval_CallObject(num_elements_, arglist);
+    Py_DECREF(arglist);
+    if (result == nullptr) {
+      // The caller detects whether a python exception has been raised.
+      return -1;
+    }
     tensorflow::int64 r = MakeInt(result);
     Py_DECREF(result);
-    Py_DECREF(arglist);
     return r;
   }
 
-- 
GitLab


From d3458112ad5a1612ec6c77f7de4a0e0ec801e882 Mon Sep 17 00:00:00 2001
From: Rachel Lim 
Date: Thu, 13 Sep 2018 14:18:16 -0700
Subject: [PATCH 0162/1357] Consistency in record_default shapes for
 tf.contrib.data.CsvDataset & tf.decode_csv: - Modify shape assertions so that
 both graph and eager accept rank 0 (scalar) and rank 1 tensors as
 `record_defaults`, and raise an error on other shapes. - Make tests run in
 both graph and eager modes

Fixes #22030.

PiperOrigin-RevId: 212877058
---
 .../contrib/data/kernels/csv_dataset_op.cc    |   3 +
 tensorflow/contrib/data/ops/dataset_ops.cc    |   8 +-
 .../contrib/data/python/kernel_tests/BUILD    |   3 +-
 .../kernel_tests/csv_dataset_op_test.py       | 123 +++++++++++-------
 .../api_def/base_api/api_def_DecodeCSV.pbtxt  |   3 +-
 tensorflow/core/kernels/decode_csv_op.cc      |   3 +
 tensorflow/core/ops/parsing_ops.cc            |   7 +-
 tensorflow/core/ops/parsing_ops_test.cc       |   7 +-
 tensorflow/python/kernel_tests/BUILD          |   3 +
 .../python/kernel_tests/decode_csv_op_test.py |  55 ++++++--
 tensorflow/python/ops/parsing_ops.py          |   3 +-
 11 files changed, 145 insertions(+), 73 deletions(-)

diff --git a/tensorflow/contrib/data/kernels/csv_dataset_op.cc b/tensorflow/contrib/data/kernels/csv_dataset_op.cc
index 74107d5242..21ec50fb6b 100644
--- a/tensorflow/contrib/data/kernels/csv_dataset_op.cc
+++ b/tensorflow/contrib/data/kernels/csv_dataset_op.cc
@@ -49,6 +49,9 @@ class CSVDatasetOp : public DatasetOpKernel {
     OP_REQUIRES_OK(ctx,
                    ctx->input_list("record_defaults", &record_defaults_list));
     for (int i = 0; i < record_defaults_list.size(); ++i) {
+      OP_REQUIRES(ctx, record_defaults_list[i].dims() <= 1,
+                  errors::InvalidArgument(
+                      "Each record default should be at most rank 1"));
       OP_REQUIRES(ctx, record_defaults_list[i].NumElements() < 2,
                   errors::InvalidArgument(
                       "There should only be 1 default per field but field ", i,
diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc
index ae104d55bd..ad410e17fe 100644
--- a/tensorflow/contrib/data/ops/dataset_ops.cc
+++ b/tensorflow/contrib/data/ops/dataset_ops.cc
@@ -65,7 +65,13 @@ REGISTER_OP("CSVDataset")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 1, &unused));
       // `record_defaults` must be lists of scalars
       for (size_t i = 8; i < c->num_inputs(); ++i) {
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &unused));
+        shape_inference::ShapeHandle v;
+        TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(i), 1, &v));
+        if (c->Rank(c->input(i)) == 1 && c->Value(c->Dim(v, 0)) > 1) {
+          return errors::InvalidArgument(
+              "Shape of a default must be a length-0 or length-1 vector, or a "
+              "scalar.");
+        }
       }
       return shape_inference::ScalarShape(c);
     });
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index b3c90ded39..ba202839b2 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -72,12 +72,13 @@ py_test(
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:platform_test",
         "//tensorflow/python:session",
         "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
index 63bffd023f..f8e74e4583 100644
--- a/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/csv_dataset_op_test.py
@@ -31,38 +31,49 @@ from tensorflow.contrib.data.python.ops import error_ops
 from tensorflow.contrib.data.python.ops import readers
 from tensorflow.python.client import session
 from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import googletest
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class CsvDatasetOpTest(test.TestCase):
 
-  def _assert_datasets_equal(self, g, ds1, ds2):
+  def _get_next(self, dataset):
+    # Returns a no argument function whose result is fed to self.evaluate to
+    # yield the next element
+    it = dataset.make_one_shot_iterator()
+    if context.executing_eagerly():
+      return it.get_next
+    else:
+      get_next = it.get_next()
+      return lambda: get_next
+
+  def _assert_datasets_equal(self, ds1, ds2):
     assert ds1.output_shapes == ds2.output_shapes, ('output_shapes differ: %s, '
                                                     '%s') % (ds1.output_shapes,
                                                              ds2.output_shapes)
     assert ds1.output_types == ds2.output_types
     assert ds1.output_classes == ds2.output_classes
-    next1 = ds1.make_one_shot_iterator().get_next()
-    next2 = ds2.make_one_shot_iterator().get_next()
-    with self.session(graph=g) as sess:
-      # Run through datasets and check that outputs match, or errors match.
-      while True:
-        try:
-          op1 = sess.run(next1)
-        except (errors.OutOfRangeError, ValueError) as e:
-          # If op1 throws an exception, check that op2 throws same exception.
-          with self.assertRaises(type(e)):
-            sess.run(next2)
-          break
-        op2 = sess.run(next2)
-        self.assertAllEqual(op1, op2)
+    next1 = self._get_next(ds1)
+    next2 = self._get_next(ds2)
+    # Run through datasets and check that outputs match, or errors match.
+    while True:
+      try:
+        op1 = self.evaluate(next1())
+      except (errors.OutOfRangeError, ValueError) as e:
+        # If op1 throws an exception, check that op2 throws same exception.
+        with self.assertRaises(type(e)):
+          self.evaluate(next2())
+        break
+      op2 = self.evaluate(next2())
+      self.assertAllEqual(op1, op2)
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
@@ -95,33 +106,32 @@ class CsvDatasetOpTest(test.TestCase):
 
   def _test_by_comparison(self, inputs, **kwargs):
     """Checks that CsvDataset is equiv to TextLineDataset->map(decode_csv)."""
-    with ops.Graph().as_default() as g:
-      dataset_actual, dataset_expected = self._make_test_datasets(
-          inputs, **kwargs)
-      self._assert_datasets_equal(g, dataset_actual, dataset_expected)
+    dataset_actual, dataset_expected = self._make_test_datasets(
+        inputs, **kwargs)
+    self._assert_datasets_equal(dataset_actual, dataset_expected)
 
   def _verify_output_or_err(self,
-                            sess,
                             dataset,
                             expected_output=None,
                             expected_err_re=None):
-    nxt = dataset.make_one_shot_iterator().get_next()
     if expected_err_re is None:
       # Verify that output is expected, without errors
+      nxt = self._get_next(dataset)
       expected_output = [[
           v.encode('utf-8') if isinstance(v, str) else v for v in op
       ] for op in expected_output]
       for value in expected_output:
-        op = sess.run(nxt)
+        op = self.evaluate(nxt())
         self.assertAllEqual(op, value)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(nxt)
+        self.evaluate(nxt())
     else:
       # Verify that OpError is produced as expected
       with self.assertRaisesOpError(expected_err_re):
+        nxt = self._get_next(dataset)
         while True:
           try:
-            sess.run(nxt)
+            self.evaluate(nxt())
           except errors.OutOfRangeError:
             break
 
@@ -137,11 +147,8 @@ class CsvDatasetOpTest(test.TestCase):
     # Convert str type because py3 tf strings are bytestrings
     filenames = self._setup_files(inputs, linebreak, compression_type)
     kwargs['compression_type'] = compression_type
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.CsvDataset(filenames, **kwargs)
-        self._verify_output_or_err(sess, dataset, expected_output,
-                                   expected_err_re)
+    dataset = readers.CsvDataset(filenames, **kwargs)
+    self._verify_output_or_err(dataset, expected_output, expected_err_re)
 
   def testCsvDataset_requiredFields(self):
     record_defaults = [[]] * 4
@@ -191,21 +198,17 @@ class CsvDatasetOpTest(test.TestCase):
     record_defaults = [['']] * 3
     inputs = [['1,"2"3",4', '1,"2"3",4",5,5', 'a,b,"c"d"', 'e,f,g']]
     filenames = self._setup_files(inputs)
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
-        dataset = dataset.apply(error_ops.ignore_errors())
-        self._verify_output_or_err(sess, dataset, [['e', 'f', 'g']])
+    dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
+    dataset = dataset.apply(error_ops.ignore_errors())
+    self._verify_output_or_err(dataset, [['e', 'f', 'g']])
 
   def testCsvDataset_ignoreErrWithUnquotedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']]
     filenames = self._setup_files(inputs)
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
-        dataset = dataset.apply(error_ops.ignore_errors())
-        self._verify_output_or_err(sess, dataset, [['e', 'f', 'g']])
+    dataset = readers.CsvDataset(filenames, record_defaults=record_defaults)
+    dataset = dataset.apply(error_ops.ignore_errors())
+    self._verify_output_or_err(dataset, [['e', 'f', 'g']])
 
   def testCsvDataset_withNoQuoteDelimAndUnquotedQuotes(self):
     record_defaults = [['']] * 3
@@ -351,10 +354,9 @@ class CsvDatasetOpTest(test.TestCase):
     inputs = [['1,,3,4', '5,6,,8']]
     ds_actual, ds_expected = self._make_test_datasets(
         inputs, record_defaults=record_defaults)
-    with ops.Graph().as_default() as g:
-      self._assert_datasets_equal(g,
-                                  ds_actual.repeat(5).prefetch(1),
-                                  ds_expected.repeat(5).prefetch(1))
+    self._assert_datasets_equal(
+        ds_actual.repeat(5).prefetch(1),
+        ds_expected.repeat(5).prefetch(1))
 
   def testCsvDataset_withTypeDefaults(self):
     # Testing using dtypes as record_defaults for required fields
@@ -373,13 +375,11 @@ class CsvDatasetOpTest(test.TestCase):
     ]]
     file_path = self._setup_files(data)
 
-    with ops.Graph().as_default() as g:
-      ds = readers.make_csv_dataset(
-          file_path, batch_size=1, shuffle=False, num_epochs=1)
-      next_batch = ds.make_one_shot_iterator().get_next()
+    ds = readers.make_csv_dataset(
+        file_path, batch_size=1, shuffle=False, num_epochs=1)
+    nxt = self._get_next(ds)
 
-    with self.session(graph=g) as sess:
-      result = list(sess.run(next_batch).values())
+    result = list(self.evaluate(nxt()).values())
 
     self.assertEqual(result, sorted(result))
 
@@ -542,6 +542,29 @@ class CsvDatasetOpTest(test.TestCase):
         compression_type='ZLIB',
         record_defaults=record_defaults)
 
+  def testCsvDataset_withScalarDefaults(self):
+    record_defaults = [constant_op.constant(0, dtype=dtypes.int64)] * 4
+    inputs = [[',,,', '1,1,1,', ',2,2,2']]
+    self._test_dataset(
+        inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
+        record_defaults=record_defaults)
+
+  def testCsvDataset_with2DDefaults(self):
+    record_defaults = [constant_op.constant([[0]], dtype=dtypes.int64)] * 4
+    inputs = [[',,,', '1,1,1,', ',2,2,2']]
+
+    if context.executing_eagerly():
+      err_spec = errors.InvalidArgumentError, (
+          'Each record default should be at '
+          'most rank 1.')
+    else:
+      err_spec = ValueError, 'Shape must be at most rank 1 but is rank 2'
+
+    with self.assertRaisesWithPredicateMatch(*err_spec):
+      self._test_dataset(
+          inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
+          record_defaults=record_defaults)
+
 
 class CsvDatasetBenchmark(test.Benchmark):
   """Benchmarks for the various ways of creating a dataset from CSV files.
diff --git a/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt b/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt
index e39213cbc7..440800704e 100644
--- a/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DecodeCSV.pbtxt
@@ -11,7 +11,8 @@ END
     name: "record_defaults"
     description: <input_list("record_defaults", &record_defaults));
 
     for (int i = 0; i < record_defaults.size(); ++i) {
+      OP_REQUIRES(ctx, record_defaults[i].dims() <= 1,
+                  errors::InvalidArgument(
+                      "Each record default should be at most rank 1"));
       OP_REQUIRES(ctx, record_defaults[i].NumElements() < 2,
                   errors::InvalidArgument(
                       "There should only be 1 default per field but field ", i,
diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index 79ca96d249..eff453241d 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -343,10 +343,11 @@ REGISTER_OP("DecodeCSV")
       // Validate the record_defaults inputs.
       for (int i = 1; i < c->num_inputs(); ++i) {
         ShapeHandle v;
-        TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 1, &v));
-        if (c->Value(c->Dim(v, 0)) > 1) {
+        TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(i), 1, &v));
+        if (c->Rank(c->input(i)) == 1 && c->Value(c->Dim(v, 0)) > 1) {
           return errors::InvalidArgument(
-              "Shape of a default must be a length-0 or length-1 vector");
+              "Shape of a default must be a length-0 or length-1 vector, or a "
+              "scalar.");
         }
       }
 
diff --git a/tensorflow/core/ops/parsing_ops_test.cc b/tensorflow/core/ops/parsing_ops_test.cc
index c65e66d1a8..ba594e400c 100644
--- a/tensorflow/core/ops/parsing_ops_test.cc
+++ b/tensorflow/core/ops/parsing_ops_test.cc
@@ -52,9 +52,12 @@ TEST(ParsingOpsTest, DecodeCSV_ShapeFn) {
   INFER_OK(op, "[1,2,?,4];?;?", "in0;in0");
   INFER_OK(op, "[1,2,?,4];[?];[?]", "in0;in0");
 
+  // Scalar defaults are ok
+  INFER_OK(op, "?;?;[]", "in0;in0");
+
   // Check errors in the record_defaults inputs.
-  INFER_ERROR("must be rank 1", op, "?;?;[]");
-  INFER_ERROR("must be rank 1", op, "?;[];?");
+  INFER_ERROR("must be at most rank 1 but is rank 2", op, "?;?;[1,2]");
+  INFER_ERROR("must be at most rank 1 but is rank 2", op, "?;[3,4];?");
   INFER_ERROR("Shape of a default must be", op, "?;?;[2]");
   INFER_ERROR("Shape of a default must be", op, "?;[2];?");
 }
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index da21ee3043..6bba99b9e7 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -286,7 +286,10 @@ tf_py_test(
     srcs = ["decode_csv_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python/eager:context",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:parsing_ops",
     ],
 )
diff --git a/tensorflow/python/kernel_tests/decode_csv_op_test.py b/tensorflow/python/kernel_tests/decode_csv_op_test.py
index 40b17a11f8..e9307a6b2f 100644
--- a/tensorflow/python/kernel_tests/decode_csv_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_csv_op_test.py
@@ -20,28 +20,30 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class DecodeCSVOpTest(test.TestCase):
 
   def _test(self, args, expected_out=None, expected_err_re=None):
-    with self.cached_session() as sess:
+    if expected_err_re is None:
       decode = parsing_ops.decode_csv(**args)
-
-      if expected_err_re is None:
-        out = sess.run(decode)
-
-        for i, field in enumerate(out):
-          if field.dtype == np.float32 or field.dtype == np.float64:
-            self.assertAllClose(field, expected_out[i])
-          else:
-            self.assertAllEqual(field, expected_out[i])
-
-      else:
-        with self.assertRaisesOpError(expected_err_re):
-          sess.run(decode)
+      out = self.evaluate(decode)
+
+      for i, field in enumerate(out):
+        if field.dtype == np.float32 or field.dtype == np.float64:
+          self.assertAllClose(field, expected_out[i])
+        else:
+          self.assertAllEqual(field, expected_out[i])
+    else:
+      with self.assertRaisesOpError(expected_err_re):
+        decode = parsing_ops.decode_csv(**args)
+        self.evaluate(decode)
 
   def testSimple(self):
     args = {
@@ -53,6 +55,31 @@ class DecodeCSVOpTest(test.TestCase):
 
     self._test(args, expected_out)
 
+  def testSimpleWithScalarDefaults(self):
+    args = {
+        "records": ["1,4", "2,5", "3,6"],
+        "record_defaults": [1, 2],
+    }
+
+    expected_out = [[1, 2, 3], [4, 5, 6]]
+
+    self._test(args, expected_out)
+
+  def testSimpleWith2DDefaults(self):
+    args = {
+        "records": ["1", "2", "3"],
+        "record_defaults": [[[0]]],
+    }
+
+    if context.executing_eagerly():
+      err_spec = errors.InvalidArgumentError, (
+          "Each record default should be at "
+          "most rank 1.")
+    else:
+      err_spec = ValueError, "Shape must be at most rank 1 but is rank 2"
+    with self.assertRaisesWithPredicateMatch(*err_spec):
+      self._test(args)
+
   def testSimpleNoQuoteDelimiter(self):
     args = {
         "records": ["1", "2", '"3"'],
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index 8224097ac4..bb8da3162a 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -1584,7 +1584,8 @@ def decode_csv(records,
     record_defaults: A list of `Tensor` objects with specific types.
       Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
       One tensor per column of the input record, with either a
-      scalar default value for that column or empty if the column is required.
+      scalar default value for that column or an empty vector if the column is
+      required.
     field_delim: An optional `string`. Defaults to `","`.
       char delimiter to separate fields in a record.
     use_quote_delim: An optional `bool`. Defaults to `True`.
-- 
GitLab


From 4999d856d2953aee56fa9759f995038edf3ff566 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 14:31:18 -0700
Subject: [PATCH 0163/1357] Expose tf.contrib.checkpoint.PythonStateWrapper.

This makes it possible to checkpoint arbitrary python state if it can be
serialized to a string.

Also updates NumpyState to accept np.int32, np.int64, np.float32, np.float64
types.

PiperOrigin-RevId: 212879609
---
 tensorflow/contrib/checkpoint/__init__.py     |  2 +
 .../contrib/checkpoint/python/python_state.py | 40 +++++++++++++------
 .../checkpoint/python/python_state_test.py    |  5 +++
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/checkpoint/__init__.py b/tensorflow/contrib/checkpoint/__init__.py
index 150d734db6..94b7f4f867 100644
--- a/tensorflow/contrib/checkpoint/__init__.py
+++ b/tensorflow/contrib/checkpoint/__init__.py
@@ -37,6 +37,7 @@ Checkpoint management:
 
 Saving and restoring Python state:
 @@NumpyState
+@@PythonStateWrapper
 """
 
 from __future__ import absolute_import
@@ -45,6 +46,7 @@ from __future__ import print_function
 
 from tensorflow.contrib.checkpoint.python.containers import UniqueNameTracker
 from tensorflow.contrib.checkpoint.python.python_state import NumpyState
+from tensorflow.contrib.checkpoint.python.python_state import PythonStateWrapper
 from tensorflow.contrib.checkpoint.python.split_dependency import split_dependency
 from tensorflow.contrib.checkpoint.python.visualize import dot_graph_from_checkpoint
 from tensorflow.core.protobuf.checkpointable_object_graph_pb2 import CheckpointableObjectGraph
diff --git a/tensorflow/contrib/checkpoint/python/python_state.py b/tensorflow/contrib/checkpoint/python/python_state.py
index 9b11035b6d..302d5cfb79 100644
--- a/tensorflow/contrib/checkpoint/python/python_state.py
+++ b/tensorflow/contrib/checkpoint/python/python_state.py
@@ -17,7 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import abc
 import functools
+import six
 
 import numpy
 
@@ -101,7 +103,7 @@ class NumpyState(base.CheckpointableBase):
     # TODO(allenl): Consider supporting lists/tuples, either ad-hoc or by making
     # ndarrays checkpointable natively and using standard checkpointable list
     # tracking.
-    if isinstance(value, numpy.ndarray):
+    if isinstance(value, (numpy.ndarray, numpy.generic)):
       try:
         existing = super(NumpyState, self).__getattribute__(name)
         existing.array = value
@@ -127,7 +129,29 @@ class NumpyState(base.CheckpointableBase):
     super(NumpyState, self).__setattr__(name, value)
 
 
-class _NumpyWrapper(base.CheckpointableBase):
+@six.add_metaclass(abc.ABCMeta)
+class PythonStateWrapper(base.CheckpointableBase):
+  """Wraps a Python object for storage in an object-based checkpoint."""
+
+  @abc.abstractmethod
+  def _serialize(self):
+    """Callback for `PythonStringStateSaveable` to serialize the object."""
+
+  @abc.abstractmethod
+  def _deserialize(self, string_value):
+    """Callback for `PythonStringStateSaveable` to deserialize the object."""
+
+  def _gather_saveables_for_checkpoint(self):
+    """Specify callbacks for saving and restoring `array`."""
+    return {
+        "py_state": functools.partial(
+            base.PythonStringStateSaveable,
+            state_callback=self._serialize,
+            restore_callback=self._deserialize)
+        }
+
+
+class _NumpyWrapper(PythonStateWrapper):
   """Wraps a NumPy array for storage in an object-based checkpoint."""
 
   def __init__(self, array):
@@ -139,7 +163,7 @@ class _NumpyWrapper(base.CheckpointableBase):
     self.array = array
 
   def _serialize(self):
-    """Callback for `PythonStringStateSaveable` to serialize the array."""
+    """Callback to serialize the array."""
     string_file = BytesIO()
     try:
       numpy.save(string_file, self.array, allow_pickle=False)
@@ -149,18 +173,10 @@ class _NumpyWrapper(base.CheckpointableBase):
     return serialized
 
   def _deserialize(self, string_value):
-    """Callback for `PythonStringStateSaveable` to deserialize the array."""
+    """Callback to deserialize the array."""
     string_file = BytesIO(string_value)
     try:
       self.array = numpy.load(string_file, allow_pickle=False)
     finally:
       string_file.close()
 
-  def _gather_saveables_for_checkpoint(self):
-    """Specify callbacks for saving and restoring `array`."""
-    return {
-        "array": functools.partial(
-            base.PythonStringStateSaveable,
-            state_callback=self._serialize,
-            restore_callback=self._deserialize)
-        }
diff --git a/tensorflow/contrib/checkpoint/python/python_state_test.py b/tensorflow/contrib/checkpoint/python/python_state_test.py
index 0439a4755e..45494351ff 100644
--- a/tensorflow/contrib/checkpoint/python/python_state_test.py
+++ b/tensorflow/contrib/checkpoint/python/python_state_test.py
@@ -40,10 +40,13 @@ class NumpyStateTests(test.TestCase):
     save_state.a = numpy.ones([2, 2])
     save_state.b = numpy.ones([2, 2])
     save_state.b = numpy.zeros([2, 2])
+    save_state.c = numpy.int64(3)
     self.assertAllEqual(numpy.ones([2, 2]), save_state.a)
     self.assertAllEqual(numpy.zeros([2, 2]), save_state.b)
+    self.assertEqual(3, save_state.c)
     first_save_path = saver.save(prefix)
     save_state.a[1, 1] = 2.
+    save_state.c = numpy.int64(4)
     second_save_path = saver.save(prefix)
 
     load_state = python_state.NumpyState()
@@ -51,6 +54,7 @@ class NumpyStateTests(test.TestCase):
     loader.restore(first_save_path).initialize_or_restore()
     self.assertAllEqual(numpy.ones([2, 2]), load_state.a)
     self.assertAllEqual(numpy.zeros([2, 2]), load_state.b)
+    self.assertEqual(3, load_state.c)
     load_state.a[0, 0] = 42.
     self.assertAllEqual([[42., 1.], [1., 1.]], load_state.a)
     loader.restore(first_save_path).run_restore_ops()
@@ -58,6 +62,7 @@ class NumpyStateTests(test.TestCase):
     loader.restore(second_save_path).run_restore_ops()
     self.assertAllEqual([[1., 1.], [1., 2.]], load_state.a)
     self.assertAllEqual(numpy.zeros([2, 2]), load_state.b)
+    self.assertEqual(4, load_state.c)
 
   def testNoGraphPollution(self):
     graph = ops.Graph()
-- 
GitLab


From 25d8c732dcf7fa82d086c5da46408838fa0f04f1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 14:53:31 -0700
Subject: [PATCH 0164/1357] Add ability to skip serializing selected tensors in
 interpreter serializer.

PiperOrigin-RevId: 212883697
---
 .../contrib/lite/experimental/writer/writer_lib.cc | 14 ++++++++++----
 .../contrib/lite/experimental/writer/writer_lib.h  |  7 ++++++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/experimental/writer/writer_lib.cc b/tensorflow/contrib/lite/experimental/writer/writer_lib.cc
index 52b17faf82..555a9cc4b0 100644
--- a/tensorflow/contrib/lite/experimental/writer/writer_lib.cc
+++ b/tensorflow/contrib/lite/experimental/writer/writer_lib.cc
@@ -117,6 +117,8 @@ Offset>> InterpreterWriter::ExportOperators(
 
 Offset>> InterpreterWriter::ExportTensors(
     FlatBufferBuilder* fbb) {
+  // Initialized to -1.
+  // A value of -1 means this tensor will not be exported.
   tensor_to_written_tensor_.resize(interpreter_->tensors_size(), -1);
 
   std::vector> tensors;
@@ -135,15 +137,17 @@ Offset>> InterpreterWriter::ExportTensors(
   int curr_output_index = 0;
   for (int tensor_index = 0; tensor_index < interpreter_->tensors_size();
        tensor_index++) {
-    if (!tensor_is_temporary[tensor_index]) {
+    // Temporary tensors and unused tensors will not be written.
+    if (!tensor_is_temporary[tensor_index] &&
+        unused_tensors_.find(tensor_index) == unused_tensors_.end()) {
       tensor_to_written_tensor_[tensor_index] = curr_output_index++;
     }
   }
 
   for (int tensor_index = 0; tensor_index < interpreter_->tensors_size();
        ++tensor_index) {
-    // Skip temporaries.
-    if (tensor_is_temporary[tensor_index]) continue;
+    // Tensor not exported.
+    if (tensor_to_written_tensor_[tensor_index] == -1) continue;
 
     if (TfLiteTensor* tensor = interpreter_->tensor(tensor_index)) {
       // We only need to convert non temporaries
@@ -215,7 +219,9 @@ std::vector InterpreterWriter::RemapTensorIndicesToWritten(
   std::vector output;
   output.reserve(input.size());
   for (int x : input) {
-    output.push_back(tensor_to_written_tensor_[x]);
+    if (tensor_to_written_tensor_[x] != -1) {
+      output.push_back(tensor_to_written_tensor_[x]);
+    }
   }
   return output;
 }
diff --git a/tensorflow/contrib/lite/experimental/writer/writer_lib.h b/tensorflow/contrib/lite/experimental/writer/writer_lib.h
index a98108b496..a5f14697cf 100644
--- a/tensorflow/contrib/lite/experimental/writer/writer_lib.h
+++ b/tensorflow/contrib/lite/experimental/writer/writer_lib.h
@@ -62,6 +62,10 @@ class InterpreterWriter {
   // caller to change the custom data.
   TfLiteStatus RegisterCustomWriter(const std::string& custom_name,
                                     CustomWriter custom_writer);
+  // Tensors that are unused and shouldn't be written.
+  void SetUnusedTensors(const std::set& unused_tensors) {
+    unused_tensors_ = unused_tensors;
+  }
 
  private:
   template 
@@ -111,8 +115,9 @@ class InterpreterWriter {
     int builtin;
     std::string custom;
   };
+  std::set unused_tensors_;
   // For every tensor index in the interpreter, the index in the written.
-  // This is different due to temporary tensors not being written.
+  // This is different due to temporary and unused tensors not being written.
   std::vector tensor_to_written_tensor_;
   // List of used opcodes
   std::vector opcodes_;
-- 
GitLab


From 51d72a7d7f74784b68916819edd04e890b36f957 Mon Sep 17 00:00:00 2001
From: "(David) Siu-Kei Muk" 
Date: Fri, 14 Sep 2018 05:59:05 +0800
Subject: [PATCH 0165/1357] Modified "_check_is_tensor_or_operation" to check
 if "x" is "tensor_like"

---
 tensorflow/python/estimator/model_fn.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 439cc2e3a4..331a9d1a05 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -26,6 +26,7 @@ import six
 from tensorflow.python.estimator.export import export_output as export_output_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras.metrics import Metric
 from tensorflow.python.ops import array_ops
 from tensorflow.python.saved_model import signature_constants
@@ -466,7 +467,7 @@ class _TPUEstimatorSpec(
 
 
 def _check_is_tensor_or_operation(x, name):
-  if not (isinstance(x, ops.Operation) or isinstance(x, ops.Tensor)):
+  if not (isinstance(x, ops.Operation) or tensor_util.is_tensor(x)):
     raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x))
 
 
-- 
GitLab


From e8af4e1bb9496c111530e88263fb1b8dac8bdde9 Mon Sep 17 00:00:00 2001
From: Mark Daoust 
Date: Thu, 13 Sep 2018 14:59:51 -0700
Subject: [PATCH 0166/1357] Convert "post training quant" tutorial to a
 notebook.

PiperOrigin-RevId: 212884746
---
 .../lite/tutorials/post_training_quant.ipynb  | 702 ++++++++++++++++++
 1 file changed, 702 insertions(+)
 create mode 100644 tensorflow/contrib/lite/tutorials/post_training_quant.ipynb

diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
new file mode 100644
index 0000000000..a96e2c4e1b
--- /dev/null
+++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb
@@ -0,0 +1,702 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "6Y8E0lw5eYWm"
+      },
+      "source": [
+        "# Post Training Quantization"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "CIGrZZPTZVeO"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "BTC1rDAuei_1"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "[TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/) now supports\n",
+        "converting weights to 8 bit precision as part of model conversion from\n",
+        "tensorflow graphdefs to TFLite's flat buffer format. Weight quantization\n",
+        "achieves a 4x reduction in the model size. In addition, TFLite supports on the\n",
+        "fly quantization and dequantization of activations to allow for:\n",
+        "\n",
+        "1.  Using quantized kernels for faster implementation when available.\n",
+        "\n",
+        "2.  Mixing of floating-point kernels with quantized kernels for different parts\n",
+        "    of the graph.\n",
+        "\n",
+        "Note that the activations are always stored in floating point. For ops that\n",
+        "support quantized kernels, the activations are quantized to 8 bits of precision\n",
+        "dynamically prior to processing and are de-quantized to float precision after\n",
+        "processing. Depending on the model being converted, this can give a speedup over\n",
+        "pure floating point computation.\n",
+        "\n",
+        "In contrast to\n",
+        "[quantization aware training](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/quantize)\n",
+        ", the weights are quantized post training and the activations are quantized dynamically \n",
+        "at inference in this method.\n",
+        "Therefore, the model weights are not retrained to compensate for quantization\n",
+        "induced errors. It is important to check the accuracy of the quantized model to\n",
+        "ensure that the degradation is acceptable.\n",
+        "\n",
+        "In this tutorial, we train an MNIST model from scratch, check its accuracy in\n",
+        "tensorflow and then convert the saved model into a Tensorflow Lite flatbuffer\n",
+        "with weight quantization. We finally check the\n",
+        "accuracy of the converted model and compare it to the original saved model. We\n",
+        "run the training script mnist.py from\n",
+        "[Tensorflow official mnist tutorial](https://github.com/tensorflow/models/tree/master/official/mnist).\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "2XsEP17Zelz9"
+      },
+      "source": [
+        "## Building an MNIST model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "dDqqUIZjZjac"
+      },
+      "source": [
+        "### Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "gyqAw1M9lyab"
+      },
+      "outputs": [],
+      "source": [
+        "! pip uninstall -y tensorflow\n",
+        "! pip install -U tf-nightly"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "WsN6s5L1ieNl"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "tf.enable_eager_execution()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "00U0taBoe-w7"
+      },
+      "outputs": [],
+      "source": [
+        "! git clone --depth 1 https://github.com/tensorflow/models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "4XZPtSh-fUOc"
+      },
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "import os\n",
+        "\n",
+        "if sys.version_info.major \u003e= 3:\n",
+        "    import pathlib\n",
+        "else:\n",
+        "    import pathlib2 as pathlib\n",
+        "\n",
+        "# Add `models` to the python path.\n",
+        "models_path = os.path.join(os.getcwd(), \"models\")\n",
+        "sys.path.append(models_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "eQ6Q0qqKZogR"
+      },
+      "source": [
+        "### Train and export the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "eMsw_6HujaqM"
+      },
+      "outputs": [],
+      "source": [
+        "saved_models_root = \"/tmp/mnist_saved_model\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "hWSAjQWagIHl"
+      },
+      "outputs": [],
+      "source": [
+        "# The above path addition is not visible to subprocesses, add the path for the subprocess as well.\n",
+        "# Note: channels_last is required here or the conversion may fail. \n",
+        "!PYTHONPATH={models_path} python models/official/mnist/mnist.py --train_epochs=1 --export_dir {saved_models_root} --data_format=channels_last"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "5NMaNZQCkW9X"
+      },
+      "source": [
+        "For the example, we only trained the model for a single epoch, so it only trains to ~96% accuracy.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "xl8_fzVAZwOh"
+      },
+      "source": [
+        "### Convert to a TFLite model\n",
+        "\n",
+        "The `savedmodel` directory is named with a timestamp. Select the most recent one: "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Xp5oClaZkbtn"
+      },
+      "outputs": [],
+      "source": [
+        "saved_model_dir = str(sorted(pathlib.Path(saved_models_root).glob(\"*\"))[-1])\n",
+        "saved_model_dir"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "AT8BgkKmljOy"
+      },
+      "source": [
+        "Using the python `TocoConverter`, the saved model can be converted into a TFLite model.\n",
+        "\n",
+        "First load the model using the `TocoConverter`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "_i8B2nDZmAgQ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "tf.enable_eager_execution()\n",
+        "converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir)\n",
+        "tflite_model = converter.convert()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "F2o2ZfF0aiCx"
+      },
+      "source": [
+        "Write it out to a tflite file:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vptWZq2xnclo"
+      },
+      "outputs": [],
+      "source": [
+        "tflite_models_dir = pathlib.Path(\"/tmp/mnist_tflite_models/\")\n",
+        "tflite_models_dir.mkdir(exist_ok=True, parents=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Ie9pQaQrn5ue"
+      },
+      "outputs": [],
+      "source": [
+        "tflite_model_file = tflite_models_dir/\"mnist_model.tflite\"\n",
+        "tflite_model_file.write_bytes(tflite_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7BONhYtYocQY"
+      },
+      "source": [
+        "To quantize the model on export, set the `post_training_quantize` flag:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "g8PUvLWDlmmz"
+      },
+      "outputs": [],
+      "source": [
+        "# Note: If you don't have a recent tf-nightly installed, the\n",
+        "# \"post_training_quantize\" line will have no effect.\n",
+        "tf.logging.set_verbosity(tf.logging.INFO)\n",
+        "converter.post_training_quantize = True\n",
+        "tflite_quant_model = converter.convert()\n",
+        "tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant.tflite\"\n",
+        "tflite_model_quant_file.write_bytes(tflite_quant_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "PhMmUTl4sbkz"
+      },
+      "source": [
+        "Note how the resulting file, with `post_training_quantize` set, is approximately `1/4` the size."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "JExfcfLDscu4"
+      },
+      "outputs": [],
+      "source": [
+        "!ls -lh {tflite_models_dir}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "L8lQHMp_asCq"
+      },
+      "source": [
+        "## Run the TFLite models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "-5l6-ciItvX6"
+      },
+      "source": [
+        "We can run the TensorFlow Lite model using the python TensorFlow Lite\n",
+        "Interpreter. \n",
+        "\n",
+        "### load the test data\n",
+        "\n",
+        "First let's load the mnist test data to feed to it:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "eTIuU07NuKFL"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "mnist_train, mnist_test = tf.keras.datasets.mnist.load_data()\n",
+        "images, labels = tf.to_float(mnist_test[0])/255.0, mnist_test[1]\n",
+        "\n",
+        "# Note: If you change the batch size, then use \n",
+        "# `tf.contrib.lite.Interpreter.resize_tensor_input` to also change it for\n",
+        "# the interpreter.\n",
+        "mnist_ds = tf.data.Dataset.from_tensor_slices((images, labels)).batch(1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Ap_jE7QRvhPf"
+      },
+      "source": [
+        "### Load the model into an interpreter"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Jn16Rc23zTss"
+      },
+      "outputs": [],
+      "source": [
+        "interpreter = tf.contrib.lite.Interpreter(model_path=str(tflite_model_file))\n",
+        "interpreter.allocate_tensors()\n",
+        "input_index = interpreter.get_input_details()[0][\"index\"]\n",
+        "output_index = interpreter.get_output_details()[0][\"index\"]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "J8Pztk1mvNVL"
+      },
+      "outputs": [],
+      "source": [
+        "tf.logging.set_verbosity(tf.logging.DEBUG)\n",
+        "interpreter_quant = tf.contrib.lite.Interpreter(model_path=str(tflite_model_quant_file))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "Afl6yGvWyqAr"
+      },
+      "outputs": [],
+      "source": [
+        "interpreter_quant.allocate_tensors()\n",
+        "input_index = interpreter_quant.get_input_details()[0][\"index\"]\n",
+        "output_index = interpreter_quant.get_output_details()[0][\"index\"]\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "2opUt_JTdyEu"
+      },
+      "source": [
+        "### Test the model on one image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "AKslvo2kwWac"
+      },
+      "outputs": [],
+      "source": [
+        "for img, label in mnist_ds.take(1):\n",
+        "  break\n",
+        "\n",
+        "interpreter.set_tensor(input_index, img)\n",
+        "interpreter.invoke()\n",
+        "predictions = interpreter.get_tensor(output_index)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "XZClM2vo3_bm"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pylab as plt\n",
+        "\n",
+        "plt.imshow(img[0])\n",
+        "template = \"True:{true}, predicted:{predict}\"\n",
+        "_ = plt.title(template.format(true= str(label[0].numpy()),\n",
+        "                              predict=str(predictions[0,0])))\n",
+        "plt.grid(False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LwN7uIdCd8Gw"
+      },
+      "source": [
+        "### Evaluate the models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "05aeAuWjvjPx"
+      },
+      "outputs": [],
+      "source": [
+        "def eval_model(interpreter, mnist_ds):\n",
+        "  total_seen = 0\n",
+        "  num_correct = 0\n",
+        "\n",
+        "  for img, label in mnist_ds:\n",
+        "    total_seen += 1\n",
+        "    interpreter.set_tensor(input_index, img)\n",
+        "    interpreter.invoke()\n",
+        "    predictions = interpreter.get_tensor(output_index)\n",
+        "    if predictions == label.numpy():\n",
+        "      num_correct += 1\n",
+        "\n",
+        "    if total_seen % 500 == 0:\n",
+        "        print(\"Accuracy after %i images: %f\" %\n",
+        "              (total_seen, float(num_correct) / float(total_seen)))\n",
+        "\n",
+        "  return float(num_correct) / float(total_seen)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DqXBnDfJ7qxL"
+      },
+      "outputs": [],
+      "source": [
+        "print(eval_model(interpreter_quant, mnist_ds))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Km3cY9ry8ZlG"
+      },
+      "source": [
+        "We can repeat the evaluation on the weight quantized model to obtain:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "-9cnwiPp6EGm"
+      },
+      "outputs": [],
+      "source": [
+        "print(eval_model(interpreter_quant, mnist_ds))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "L7lfxkor8pgv"
+      },
+      "source": [
+        "\n",
+        "In this example, we have compressed model with no difference in the accuracy."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "M0o1FtmWeKZm"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "## Optimizing an existing model\n",
+        "\n",
+        "We now consider another example. Resnets with pre-activation layers (Resnet-v2) are widely used for vision applications.\n",
+        "  Pre-trained frozen graph for resnet-v2-101 is available at the\n",
+        "  [Tensorflow Lite model repository](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/g3doc/models.md).\n",
+        "\n",
+        "We can convert the frozen graph to a TFLite flatbuffer with quantization by:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "v5p5VcNPjILQ"
+      },
+      "outputs": [],
+      "source": [
+        "archive_path = tf.keras.utils.get_file(\"resnet_v2_101.tgz\", \"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz\", extract=True)\n",
+        "archive_path = pathlib.Path(archive_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "-sxnXQuC4ThD"
+      },
+      "source": [
+        "The `info.txt` file lists the input and output names. You can also find them using TensorBoard to visually inspect the graph."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "g_Q_OMEJ4LIc"
+      },
+      "outputs": [],
+      "source": [
+        "! cat {archive_path}/resnet_v2_101_299_info.txt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "ujCAFhqm-C6H"
+      },
+      "outputs": [],
+      "source": [
+        "graph_def_file = pathlib.Path(archive_path).parent/\"resnet_v2_101_299_frozen.pb\"\n",
+        "input_arrays = [\"input\"] \n",
+        "output_arrays = [\"output\"]\n",
+        "converter = tf.contrib.lite.TocoConverter.from_frozen_graph(\n",
+        "  str(graph_def_file), input_arrays, output_arrays, input_shapes={\"input\":[1,299,299,3]})\n",
+        "converter.post_training_quantize = True\n",
+        "resnet_tflite_file = graph_def_file.parent/\"resnet_v2_101_quantized.tflite\"\n",
+        "resnet_tflite_file.write_bytes(converter.convert())\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vhOjeg1x9Knp"
+      },
+      "outputs": [],
+      "source": [
+        "archive_dir = str(archive_path.parent)\n",
+        "!ls -lh {archive_dir}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "qqHLaqFMCjRZ"
+      },
+      "source": [
+        "\n",
+        "The model size reduces from 171 MB to 43 MB.\n",
+        "The accuracy of this model on imagenet can be evaluated using the scripts provided for [TFLite accuracy measurement](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/accuracy/ilsvrc).\n",
+        "\n",
+        "The optimized model top-1 accuracy is 76.8, the same as the floating point model."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "post-training-quant.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "toc_visible": true,
+      "version": "0.3.2"
+    },
+    "kernelspec": {
+      "display_name": "Python 2",
+      "name": "python2"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
-- 
GitLab


From fb50c8e9a3cb2ccfac9cf4a847d5841cba80b524 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar 
Date: Thu, 13 Sep 2018 15:01:08 -0700
Subject: [PATCH 0167/1357] Dilated Depthwise Conv reference implementations.

PiperOrigin-RevId: 212884951
---
 tensorflow/contrib/lite/c/builtin_op_data.h   |   7 ++
 .../lite/core/api/flatbuffer_conversions.cc   |   3 +
 .../contrib/lite/kernels/depthwise_conv.cc    |  61 ++++++---
 .../lite/kernels/depthwise_conv_test.cc       | 116 +++++++++++++++++-
 .../internal/optimized/depthwiseconv_float.h  |  20 +++
 .../internal/optimized/depthwiseconv_uint8.h  |  24 ++++
 .../internal/reference/depthwiseconv_float.h  |  24 +++-
 .../internal/reference/depthwiseconv_uint8.h  |  28 ++++-
 tensorflow/contrib/lite/schema/schema.fbs     |   4 +
 .../contrib/lite/schema/schema_generated.h    |  38 +++++-
 .../contrib/lite/testing/generate_examples.py |   2 +
 tensorflow/contrib/lite/toco/model.h          |   5 +
 .../contrib/lite/toco/tflite/operator.cc      |  14 ++-
 13 files changed, 314 insertions(+), 32 deletions(-)

diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h
index fa43e6a024..be9d551ee4 100644
--- a/tensorflow/contrib/lite/c/builtin_op_data.h
+++ b/tensorflow/contrib/lite/c/builtin_op_data.h
@@ -25,6 +25,9 @@ extern "C" {
 
 // TODO(aselle): Consider using "if this then that" for testing.
 
+// IMPORTANT: All new members of structs must be added at the end to ensure
+// backwards compatibility.
+
 // Possible padding types (for convolutions)
 typedef enum {
   kTfLitePaddingUnknown = 0,
@@ -71,11 +74,15 @@ typedef struct {
 } TfLitePoolParams;
 
 typedef struct {
+  // Parameters for DepthwiseConv version 1 or above.
   TfLitePadding padding;
   int stride_width;
   int stride_height;
   int depth_multiplier;
   TfLiteFusedActivation activation;
+  // Parameters for DepthwiseConv version 2 or above.
+  int dilation_width_factor;
+  int dilation_height_factor;
 } TfLiteDepthwiseConvParams;
 
 typedef struct {
diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
index eef4b6d831..f4d2839b1b 100644
--- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc
@@ -216,6 +216,9 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
         params->depth_multiplier = conv_params->depth_multiplier();
         params->activation =
             parse_activation(conv_params->fused_activation_function());
+
+        params->dilation_width_factor = conv_params->dilation_w_factor();
+        params->dilation_height_factor = conv_params->dilation_h_factor();
       }
       *builtin_data = reinterpret_cast(params);
       break;
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
index 347515f289..3e1ce60113 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc
@@ -126,23 +126,28 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   // Matching GetWindowedOutputSize in TensorFlow.
   auto padding = params->padding;
-  auto compute_out_size = [padding](int imageSize, int filterSize,
-                                    int stride) -> int {
+  auto compute_out_size = [padding](int image_size, int filter_size, int stride,
+                                    int dilation_rate) -> int {
+    int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
     return padding == kTfLitePaddingSame
-               ? (imageSize + stride - 1) / stride
+               ? (image_size + stride - 1) / stride
                : padding == kTfLitePaddingValid
-                     ? (imageSize - filterSize + stride) / stride
+                     ? (image_size - effective_filter_size + stride) / stride
                      : 0;
   };
 
-  int out_width = compute_out_size(width, filter_width, params->stride_width);
+  int out_width = compute_out_size(width, filter_width, params->stride_width,
+                                   params->dilation_width_factor);
   int out_height =
-      compute_out_size(height, filter_height, params->stride_height);
+      compute_out_size(height, filter_height, params->stride_height,
+                       params->dilation_height_factor);
 
-  data->padding.height = ComputePadding(params->stride_height, 1, height,
-                                        filter_height, out_height);
+  data->padding.height =
+      ComputePadding(params->stride_height, params->dilation_height_factor,
+                     height, filter_height, out_height);
   data->padding.width =
-      ComputePadding(params->stride_width, 1, width, filter_width, out_width);
+      ComputePadding(params->stride_width, params->dilation_width_factor, width,
+                     filter_width, out_width);
 
   // Note that quantized inference requires that all tensors have their
   // parameters set. This is usually done during quantized training.
@@ -177,8 +182,19 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
 
   void (*depthwise_conv)(const float*, const Dims<4>&, const float*,
                          const Dims<4>&, const float*, const Dims<4>&, int, int,
-                         int, int, int, float, float, float*, const Dims<4>&);
-  if (kernel_type == kReference) {
+                         int, int, int, int, int, float, float, float*,
+                         const Dims<4>&);
+  KernelType effective_kernel_type;
+  // TODO(suharshs): Currently only the reference implementation supports
+  // dilations.
+  if ((params->dilation_width_factor != 1) ||
+      (params->dilation_height_factor != 1)) {
+    effective_kernel_type = kReference;
+  } else {
+    effective_kernel_type = kernel_type;
+  }
+
+  if (effective_kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
@@ -188,7 +204,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
       GetTensorData(input), GetTensorDims(input),
       GetTensorData(filter), GetTensorDims(filter),
       GetTensorData(bias), GetTensorDims(bias), params->stride_width,
-      params->stride_height, data->padding.width, data->padding.height,
+      params->stride_height, params->dilation_width_factor,
+      params->dilation_height_factor, data->padding.width, data->padding.height,
       params->depth_multiplier, output_activation_min, output_activation_max,
       GetTensorData(output), GetTensorDims(output));
 }
@@ -204,9 +221,20 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 
   void (*depthwise_conv)(const uint8*, const Dims<4>&, int32, const uint8*,
                          const Dims<4>&, int32, const int32*, const Dims<4>&,
-                         int, int, int, int, int, int32, int32, int, int32,
-                         int32, uint8*, const Dims<4>&);
-  if (kernel_type == kReference) {
+                         int, int, int, int, int, int, int, int32, int32, int,
+                         int32, int32, uint8*, const Dims<4>&);
+
+  KernelType effective_kernel_type;
+  // TODO(suharshs): Currently only the reference implementation supports
+  // dilations.
+  if ((params->dilation_width_factor != 1) ||
+      (params->dilation_height_factor != 1)) {
+    effective_kernel_type = kReference;
+  } else {
+    effective_kernel_type = kernel_type;
+  }
+
+  if (effective_kernel_type == kReference) {
     depthwise_conv = &reference_ops::DepthwiseConv;
   } else {
     depthwise_conv = &optimized_ops::DepthwiseConv;
@@ -216,7 +244,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
       GetTensorData(input), GetTensorDims(input), input_offset,
       GetTensorData(filter), GetTensorDims(filter), filter_offset,
       GetTensorData(bias), GetTensorDims(bias), params->stride_width,
-      params->stride_height, data->padding.width, data->padding.height,
+      params->stride_height, params->dilation_width_factor,
+      params->dilation_height_factor, data->padding.width, data->padding.height,
       params->depth_multiplier, output_offset, data->output_multiplier,
       data->output_shift, data->output_activation_min,
       data->output_activation_max, GetTensorData(output),
diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
index c00cafb9fb..2af26ab80a 100644
--- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
+++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc
@@ -30,7 +30,8 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
   // stride values.
   BaseDepthwiseConvolutionOpModel(const TensorData& input,
                                   const TensorData& filter,
-                                  const TensorData& output) {
+                                  const TensorData& output,
+                                  int dilation_factor = 1) {
     input_ = AddInput(input);
     filter_ = AddInput(filter);
 
@@ -56,7 +57,8 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel {
         BuiltinOperator_DEPTHWISE_CONV_2D,
         BuiltinOptions_DepthwiseConv2DOptions,
         CreateDepthwiseConv2DOptions(builder_, Padding_VALID, 1, 1, depth_mul,
-                                     ActivationFunctionType_NONE)
+                                     ActivationFunctionType_NONE,
+                                     dilation_factor, dilation_factor)
             .Union());
 
     BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
@@ -110,6 +112,58 @@ TEST(DepthwiseConvolutionOpTest, SimpleTest) {
                              }));
 }
 
+TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) {
+  const int depth = 1;
+  const int image_width = 9;
+  const int image_height = 9;
+  const int image_batch_count = 1;
+  const int filter_size = 3;
+  const int filter_count = 1;
+  const int dilation_factor = 3;
+  DepthwiseConvolutionOpModel m(
+      {TensorType_FLOAT32,
+       {image_batch_count, image_height, image_width, depth}},
+      {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}},
+      {TensorType_FLOAT32, {}}, dilation_factor);
+
+  // The image matrix is:
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // clang-format off
+  m.SetInput({0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0});
+  // clang-format on
+  // The filter matrix is:
+  // | 1 | 2 | 3 |
+  // | 4 | 5 | 6 |
+  // | 7 | 8 | 9 |
+  m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
+  // No bias for this test.
+  m.SetBias({0});
+  m.Invoke();
+
+  // Since the dilation rate is 3 this will reduce the size of the output from
+  // 10x10 to 3x3 of all 5s. Specifically:
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
+}
+
 class QuantizedDepthwiseConvolutionOpModel
     : public BaseDepthwiseConvolutionOpModel {
  public:
@@ -207,6 +261,64 @@ TEST(QuantizedDepthwiseConvolutionOpTest,
               ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1)));
 }
 
+TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) {
+  const int depth = 1;
+  const int image_width = 9;
+  const int image_height = 9;
+  const int image_batch_count = 1;
+  const int filter_size = 3;
+  const int filter_count = 1;
+  const int dilation_factor = 3;
+  QuantizedDepthwiseConvolutionOpModel m(
+      {TensorType_UINT8,
+       {image_batch_count, image_height, image_width, depth},
+       0,
+       255},
+      {TensorType_UINT8,
+       {depth, filter_size, filter_size, filter_count},
+       0,
+       255},
+      {TensorType_UINT8, {}, 0, 255}, dilation_factor);
+
+  // The image matrix is:
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+  // clang-format off
+  m.SetInput({0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 1, 1, 1, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0,
+              0, 0, 0, 0, 0, 0, 0, 0, 0});
+  // clang-format on
+  // The filter matrix is:
+  // | 1 | 2 | 3 |
+  // | 4 | 5 | 6 |
+  // | 7 | 8 | 9 |
+  m.SetFilter({1, 2, 3, 4, 5, 6, 7, 8, 9});
+  // No bias for this test.
+  m.SetBias({0});
+  m.Invoke();
+
+  // Since the dilation rate is 3 this will reduce the size of the output from
+  // 10x10 to 3x3 of all 5s. Specifically:
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  // | 5 | 5 | 5 |
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
+}
+
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
index 7f6eea2d5d..70810ca784 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -1067,6 +1067,26 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  // TODO(suharshs): Optimized implementation of dilation depthwise conv need to
+  // be implemented.
+  TFLITE_DCHECK(dilation_width_factor == 1);
+  TFLITE_DCHECK(dilation_height_factor == 1);
+
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, pad_width, pad_height,
+                depth_multiplier, output_activation_min, output_activation_max,
+                output_data, output_dims);
+}
+
 // legacy, for compatibility with old checked-in code
 template 
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
index 3fd00c8930..f707279600 100644
--- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1964,6 +1964,30 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  // TODO(suharshs): Optimized implementation of dilation depthwise is not
+  // supported yet.
+  TFLITE_DCHECK(dilation_width_factor == 1);
+  TFLITE_DCHECK(dilation_height_factor == 1);
+
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
 // Legacy, for compatibility with old checked-in code.
 template 
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
index 9aabee5000..bb5d590775 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h
@@ -25,8 +25,9 @@ namespace reference_ops {
 inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
                           const float* filter_data, const Dims<4>& filter_dims,
                           const float* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
                           float output_activation_min,
                           float output_activation_max, float* output_data,
                           const Dims<4>& output_dims) {
@@ -52,8 +53,9 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
             float total = 0.f;
             for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
               for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + filter_x;
-                const int in_y = in_y_origin + filter_y;
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y =
+                    in_y_origin + dilation_height_factor * filter_y;
                 // If the location is outside the bounds of the input image,
                 // use zero as a default value.
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
@@ -81,6 +83,20 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          float output_activation_min,
+                          float output_activation_max, float* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data,
+                bias_dims, stride_width, stride_height, 1, 1, pad_width,
+                pad_height, depth_multiplier, output_activation_min,
+                output_activation_max, output_data, output_dims);
+}
+
 // Legacy, for compatibility with old checked-in code.
 template 
 void DepthwiseConv(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
index d57739279f..5e3e8997fc 100644
--- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -30,8 +30,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
                           int32 input_offset, const uint8* filter_data,
                           const Dims<4>& filter_dims, int32 filter_offset,
                           const int32* bias_data, const Dims<4>& bias_dims,
-                          int stride_width, int stride_height, int pad_width,
-                          int pad_height, int depth_multiplier,
+                          int stride_width, int stride_height,
+                          int dilation_width_factor, int dilation_height_factor,
+                          int pad_width, int pad_height, int depth_multiplier,
                           int32 output_offset, int32 output_multiplier,
                           int output_shift, int32 output_activation_min,
                           int32 output_activation_max, uint8* output_data,
@@ -58,8 +59,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
             int32 acc = 0;
             for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
               for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-                const int in_x = in_x_origin + filter_x;
-                const int in_y = in_y_origin + filter_y;
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y =
+                    in_y_origin + dilation_height_factor * filter_y;
                 // If the location is outside the bounds of the input image,
                 // use zero as a default value.
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
@@ -90,6 +92,24 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
   }
 }
 
+inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
+                          int32 input_offset, const uint8* filter_data,
+                          const Dims<4>& filter_dims, int32 filter_offset,
+                          const int32* bias_data, const Dims<4>& bias_dims,
+                          int stride_width, int stride_height, int pad_width,
+                          int pad_height, int depth_multiplier,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_shift, int32 output_activation_min,
+                          int32 output_activation_max, uint8* output_data,
+                          const Dims<4>& output_dims) {
+  DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims,
+                filter_offset, bias_data, bias_dims, stride_width,
+                stride_height, 1, 1, pad_width, pad_height, depth_multiplier,
+                output_offset, output_multiplier, output_shift,
+                output_activation_min, output_activation_max, output_data,
+                output_dims);
+}
+
 // Legacy, for compatibility with old checked-in code.
 template 
 void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index d5da4fcccf..f0db22d581 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -276,11 +276,15 @@ table Pool2DOptions {
 }
 
 table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
   padding:Padding;
   stride_w:int;
   stride_h:int;
   depth_multiplier:int;
   fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
 }
 
 table ConcatEmbeddingsOptions {
diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h
index 0b9c57480e..8c086a5e67 100755
--- a/tensorflow/contrib/lite/schema/schema_generated.h
+++ b/tensorflow/contrib/lite/schema/schema_generated.h
@@ -2339,12 +2339,16 @@ struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
   int32_t stride_h;
   int32_t depth_multiplier;
   ActivationFunctionType fused_activation_function;
+  int32_t dilation_w_factor;
+  int32_t dilation_h_factor;
   DepthwiseConv2DOptionsT()
       : padding(Padding_SAME),
         stride_w(0),
         stride_h(0),
         depth_multiplier(0),
-        fused_activation_function(ActivationFunctionType_NONE) {
+        fused_activation_function(ActivationFunctionType_NONE),
+        dilation_w_factor(1),
+        dilation_h_factor(1) {
   }
 };
 
@@ -2355,7 +2359,9 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
     VT_STRIDE_W = 6,
     VT_STRIDE_H = 8,
     VT_DEPTH_MULTIPLIER = 10,
-    VT_FUSED_ACTIVATION_FUNCTION = 12
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_W_FACTOR = 14,
+    VT_DILATION_H_FACTOR = 16
   };
   Padding padding() const {
     return static_cast(GetField(VT_PADDING, 0));
@@ -2372,6 +2378,12 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
   ActivationFunctionType fused_activation_function() const {
     return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0));
   }
+  int32_t dilation_w_factor() const {
+    return GetField(VT_DILATION_W_FACTOR, 1);
+  }
+  int32_t dilation_h_factor() const {
+    return GetField(VT_DILATION_H_FACTOR, 1);
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField(verifier, VT_PADDING) &&
@@ -2379,6 +2391,8 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
            VerifyField(verifier, VT_STRIDE_H) &&
            VerifyField(verifier, VT_DEPTH_MULTIPLIER) &&
            VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField(verifier, VT_DILATION_H_FACTOR) &&
            verifier.EndTable();
   }
   DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -2404,6 +2418,12 @@ struct DepthwiseConv2DOptionsBuilder {
   void add_fused_activation_function(ActivationFunctionType fused_activation_function) {
     fbb_.AddElement(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0);
   }
+  void add_dilation_w_factor(int32_t dilation_w_factor) {
+    fbb_.AddElement(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor) {
+    fbb_.AddElement(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
   explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -2422,8 +2442,12 @@ inline flatbuffers::Offset CreateDepthwiseConv2DOptions(
     int32_t stride_w = 0,
     int32_t stride_h = 0,
     int32_t depth_multiplier = 0,
-    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) {
+    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+    int32_t dilation_w_factor = 1,
+    int32_t dilation_h_factor = 1) {
   DepthwiseConv2DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
   builder_.add_depth_multiplier(depth_multiplier);
   builder_.add_stride_h(stride_h);
   builder_.add_stride_w(stride_w);
@@ -7064,6 +7088,8 @@ inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const
   { auto _e = stride_h(); _o->stride_h = _e; };
   { auto _e = depth_multiplier(); _o->depth_multiplier = _e; };
   { auto _e = fused_activation_function(); _o->fused_activation_function = _e; };
+  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; };
+  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; };
 }
 
 inline flatbuffers::Offset DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -7079,13 +7105,17 @@ inline flatbuffers::Offset CreateDepthwiseConv2DOptions(
   auto _stride_h = _o->stride_h;
   auto _depth_multiplier = _o->depth_multiplier;
   auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
   return tflite::CreateDepthwiseConv2DOptions(
       _fbb,
       _padding,
       _stride_w,
       _stride_h,
       _depth_multiplier,
-      _fused_activation_function);
+      _fused_activation_function,
+      _dilation_w_factor,
+      _dilation_h_factor);
 }
 
 inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 5d0895c72f..3754b58b23 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -1434,6 +1434,7 @@ def make_depthwiseconv_tests(zip_path):
           "input_shape": [[1, 3, 4, 3], [1, 10, 10, 3]],
           "filter_size": [[1, 1], [1, 2], [3, 3]],
           "strides": [[1, 1, 1, 1], [1, 3, 3, 1]],
+          "dilations": [[1, 1, 1, 1], [1, 3, 2, 1], [1, 2, 2, 1]],
           "channel_multiplier": [1, 2],
           "rate": [[1, 1]],
           "padding": ["SAME", "VALID"],
@@ -1444,6 +1445,7 @@ def make_depthwiseconv_tests(zip_path):
           "input_shape": [[1, 3, 4, 3]],
           "filter_size": [[1, 1]],
           "strides": [[1, 1, 2, 1]],  # TF needs [1, x, x, 1]
+          "dilations": [[1, 1, 1, 1], [1, 2, 2, 1]],
           "channel_multiplier": [2],
           "rate": [[2, 2]],  #  Only [1, 1] is supported
           "padding": ["SAME"],
diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h
index 2e100e37f6..164b70f2df 100644
--- a/tensorflow/contrib/lite/toco/model.h
+++ b/tensorflow/contrib/lite/toco/model.h
@@ -477,6 +477,11 @@ struct DepthwiseConvOperator : Operator {
   int stride_height = 0;
   int stride_width = 0;
   int depth_multiplier = 0;
+  // A dilation_rate of 0 is invalid and this field is an optional attribute.
+  // Thus initializing it to 1 to allow default conv behavior when the
+  // attribute is not present.
+  int dilation_width_factor = 1;
+  int dilation_height_factor = 1;
 };
 
 // Depth-to-space transform operator.
diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc
index 5486012176..1061e7c7c4 100644
--- a/tensorflow/contrib/lite/toco/tflite/operator.cc
+++ b/tensorflow/contrib/lite/toco/tflite/operator.cc
@@ -107,7 +107,8 @@ class DepthwiseConvolution
         ActivationFunction::Serialize(op.fused_activation_function);
     return ::tflite::CreateDepthwiseConv2DOptions(
         *builder, padding, op.stride_width, op.stride_height,
-        op.depth_multiplier, activation_function);
+        op.depth_multiplier, activation_function, op.dilation_width_factor,
+        op.dilation_height_factor);
   }
 
   void ReadOptions(const TfLiteOptions& options,
@@ -118,9 +119,18 @@ class DepthwiseConvolution
     op->depth_multiplier = options.depth_multiplier();
     op->fused_activation_function =
         ActivationFunction::Deserialize(options.fused_activation_function());
+    op->dilation_width_factor = options.dilation_w_factor();
+    op->dilation_height_factor = options.dilation_h_factor();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const Operator& op) const override {
+    const auto& conv_op = static_cast(op);
+    if (conv_op.dilation_width_factor != 1 ||
+        conv_op.dilation_height_factor != 1) {
+      return 2;
+    }
+    return 1;
+  }
 };
 
 class Add : public BuiltinOperator
Date: Thu, 13 Sep 2018 15:15:23 -0700
Subject: [PATCH 0168/1357] Fix parallel_gpu_execute.sh script on windows.

PiperOrigin-RevId: 212887532
---
 .../gpu_build/parallel_gpu_execute.sh         | 26 +++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
index 48b3989d86..03a2a07fb1 100755
--- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -31,6 +31,28 @@ TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU:-4}
 # future and to use a rounder number, we set it to 1G.
 export TF_PER_DEVICE_MEMORY_LIMIT_MB=1024
 
+# *******************************************************************
+#         This section of the script is needed to
+#         make things work on windows under msys.
+# *******************************************************************
+RUNFILES_MANIFEST_FILE="${TEST_SRCDIR}/MANIFEST"
+function rlocation() {
+  if is_absolute "$1" ; then
+    # If the file path is already fully specified, simply return it.
+    echo "$1"
+  elif [[ -e "$TEST_SRCDIR/$1" ]]; then
+    # If the file exists in the $TEST_SRCDIR then just use it.
+    echo "$TEST_SRCDIR/$1"
+  elif [[ -e "$RUNFILES_MANIFEST_FILE" ]]; then
+    # If a runfiles manifest file exists then use it.
+    echo "$(grep "^$1 " "$RUNFILES_MANIFEST_FILE" | sed 's/[^ ]* //')"
+  fi
+}
+
+TEST_BINARY="$(rlocation $TEST_WORKSPACE/${1#./})"
+shift
+# *******************************************************************
+
 mkdir -p /var/lock
 # Try to acquire any of the TF_GPU_COUNT * TF_TESTS_PER_GPU
 # slots to run a test at.
@@ -46,8 +68,8 @@ for j in `seq 0 $((TF_TESTS_PER_GPU-1))`; do
         # This export only works within the brackets, so it is isolated to one
         # single command.
         export CUDA_VISIBLE_DEVICES=$i
-        echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
-        $@
+        echo "Running test $TEST_BINARY $* on GPU $CUDA_VISIBLE_DEVICES"
+        "$TEST_BINARY" $@
       )
       return_code=$?
       flock -u "$lock_fd"
-- 
GitLab


From ea52ecd836098e0b1d37325cf1b91133f908547e Mon Sep 17 00:00:00 2001
From: Mark Heffernan 
Date: Thu, 13 Sep 2018 15:27:12 -0700
Subject: [PATCH 0169/1357] Fix bug in kSlice implementation in evaluator.
 Slice was producing a literal with a default layout rather than the layout of
 the slice HLO instruction. This resulted in errors when the produced literal
 was consumed by later operations.

PiperOrigin-RevId: 212889334
---
 .../compiler/xla/service/hlo_evaluator.cc     |  6 ++++++
 .../xla/service/hlo_evaluator_test.cc         | 19 +++++++++++++++++++
 .../xla/service/hlo_evaluator_typed_visitor.h | 16 +++-------------
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 064b86493d..06b6d5b559 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1339,6 +1339,12 @@ Status HloEvaluator::Preprocess(HloInstruction* hlo) {
 Status HloEvaluator::Postprocess(HloInstruction* hlo) {
   VLOG(2) << "Finished visiting " << hlo->ToString()
           << "; evaluated value is: " << GetEvaluatedLiteralFor(hlo).ToString();
+  // Out of convenience the literal may have been produced with a different
+  // layout. Relayout as indicated by the HLO instruction.
+  if (!LayoutUtil::LayoutsInShapesEqual(GetEvaluatedLiteralFor(hlo).shape(),
+                                        hlo->shape())) {
+    evaluated_.at(hlo) = evaluated_.at(hlo).Relayout(hlo->shape());
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 16411eb078..01e88566a5 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -2570,6 +2570,25 @@ ENTRY main {
   EXPECT_TRUE(LiteralTestUtil::Equal(expected, Evaluate({&arg})));
 }
 
+TEST_P(HloEvaluatorTest, SliceWithDifferentLayout) {
+  // Regression test for b/114735354.
+  const string hlo_text = R"(
+HloModule SliceWithDifferentLayout
+
+ENTRY main {
+  arg = f32[2,2,2]{0,1,2} parameter(0)
+  ROOT %slice = f32[2,2,2]{1,0,2} slice(f32[2,2,2]{0,1,2} %arg), slice={[0:2], [0:2], [0:2]}
+}
+)";
+  ParseAndVerifyModule(hlo_text);
+
+  Literal arg = LiteralUtil::CreateR3WithLayout(
+      {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}},
+      LayoutUtil::MakeLayout({0, 1, 2}));
+  Literal actual = Evaluate({&arg});
+  EXPECT_TRUE(LiteralTestUtil::Equal(arg, actual));
+}
+
 INSTANTIATE_TEST_CASE_P(HloEvaluatorTest_Instantiation, HloEvaluatorTest,
                         ::testing::ValuesIn(use_bf16_params));
 
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 7f090a52db..8fb17a0033 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -249,12 +249,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     TF_ASSIGN_OR_RETURN(Literal result,
                         parent_->GetEvaluatedLiteralFor(operand).Convert(
                             convert->shape().element_type()));
-
-    if (LayoutUtil::LayoutsInShapesEqual(result.shape(), convert->shape())) {
-      parent_->evaluated_[convert] = std::move(result);
-    } else {
-      parent_->evaluated_[convert] = result.Relayout(convert->shape().layout());
-    }
+    parent_->evaluated_[convert] = std::move(result);
     return Status::OK();
   }
 
@@ -265,11 +260,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                         parent_->GetEvaluatedLiteralFor(operand).BitcastConvert(
                             convert->shape().element_type()));
 
-    if (LayoutUtil::LayoutsInShapesEqual(result.shape(), convert->shape())) {
-      parent_->evaluated_[convert] = std::move(result);
-    } else {
-      parent_->evaluated_[convert] = result.Relayout(convert->shape().layout());
-    }
+    parent_->evaluated_[convert] = std::move(result);
     return Status::OK();
   }
 
@@ -2350,8 +2341,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       return operand_literal.Get(operand_index);
     };
 
-    auto result = LiteralUtil::CreateFromDimensions(
-        shape.element_type(), AsInt64Slice(shape.dimensions()));
+    Literal result(shape);
     TF_RETURN_IF_ERROR(result.Populate(func));
     parent_->evaluated_[slice] = std::move(result);
     return Status::OK();
-- 
GitLab


From e59ddcca727340a8b45694a28cd9f52352607e63 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar 
Date: Thu, 13 Sep 2018 15:34:43 -0700
Subject: [PATCH 0170/1357] Automated rollback of commit
 6b507a6de855a6f988100904229b7f46a5652b88

PiperOrigin-RevId: 212890622
---
 tensorflow/contrib/lite/toco/BUILD            |  1 -
 .../contrib/lite/toco/import_tensorflow.cc    | 18 -----
 .../lite/toco/import_tensorflow_test.cc       | 75 ++-----------------
 3 files changed, 5 insertions(+), 89 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index 72c71b2841..bea90f1ce8 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -331,7 +331,6 @@ cc_library(
         "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
     ] + select({
         # Placeholder for internal darwin rule.
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index eb36b3411d..9bc23c4b3c 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -58,7 +58,6 @@ using tensorflow::DT_STRING;
 using tensorflow::DT_UINT8;
 using tensorflow::GraphDef;
 using tensorflow::NodeDef;
-using tensorflow::OpRegistry;
 using tensorflow::TensorProto;
 using tensorflow::TensorShapeProto;
 
@@ -1080,23 +1079,6 @@ tensorflow::Status ConvertUnsupportedOperator(
   } else if (HasAttr(node, "Tout")) {
     const auto& output_type = GetDataTypeAttr(node, "Tout");
     op->output_data_types.push_back(ConvertDataType(output_type));
-  } else {
-    const tensorflow::OpDef* op_def = nullptr;
-    if (OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
-      for (const auto& output_arg : op_def->output_arg()) {
-        if (HasAttr(node, output_arg.type_attr())) {
-          op->output_data_types.push_back(
-              ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
-        } else {
-          LOG(INFO) << "Op node missing output type attribute: " << node.name();
-        }
-      }
-    }
-    if (op->output_data_types.empty()) {
-      // TODO(b/113613439): Figure out how to propagate types for custom ops
-      // that have no OpDef.
-      LOG(INFO) << "Unable to determine output type for op: " << node.op();
-    }
   }
   if (HasAttr(node, kAttrOutputShapes)) {
     const auto& output_shapes = GetListAttr(node, kAttrOutputShapes);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index da248826a7..a00e136dd6 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -49,17 +49,6 @@ Status ImportTensorFlowNode(const NodeDef&, const TensorFlowImportFlags&,
 
 namespace {
 
-Status ImportNode(const NodeDef& node, Model* model) {
-  const auto converter = internal::GetTensorFlowNodeConverterMap();
-  return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model,
-                                        converter);
-}
-
-Status ImportNode(const NodeDef& node) {
-  Model model;
-  return ImportNode(node, &model);
-}
-
 class ShapeImportTest : public ::testing::TestWithParam {
  protected:
   ShapeImportTest() {}
@@ -120,24 +109,12 @@ class ShapeImportTest : public ::testing::TestWithParam {
     SetAttrValue(t, &value_attr);
     (*node->mutable_attr())["value"] = value_attr;
   }
-};
-
-class TypeImportTest : public ::testing::TestWithParam<
-                           std::pair> {
- protected:
-  TypeImportTest() {}
-
-  void BuildUnaryNode(const std::string& op_name, tensorflow::DataType dtype,
-                      NodeDef* node) {
-    node->set_op(op_name);
-    node->set_name("Node1");
-
-    node->add_input();
-    node->set_input(0, "Node0");
 
-    AttrValue dtype_attr;
-    SetAttrValue(dtype, &dtype_attr);
-    (*node->mutable_attr())["T"] = dtype_attr;
+  Status ImportNode(const NodeDef& node) {
+    Model model;
+    const auto converter = internal::GetTensorFlowNodeConverterMap();
+    return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), &model,
+                                          converter);
   }
 };
 
@@ -190,47 +167,5 @@ TEST_P(ShapeImportTest, ValidShapeButZeroElements) {
 INSTANTIATE_TEST_CASE_P(ValidShapeButZeroElements, ShapeImportTest,
                         ::testing::ValuesIn(TestTypes()));
 
-std::vector> UnaryTestTypes() {
-  return {{DT_FLOAT, ArrayDataType::kFloat},
-          {DT_INT32, ArrayDataType::kInt32},
-          {DT_INT64, ArrayDataType::kInt64}};
-}
-
-TEST_P(TypeImportTest, BasicTypeInference) {
-  NodeDef node;
-  BuildUnaryNode("Atan", GetParam().first, &node);
-
-  Model model;
-  EXPECT_TRUE(ImportNode(node, &model).ok());
-
-  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
-  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
-  const TensorFlowUnsupportedOperator* op =
-      static_cast(
-          model.operators[0].get());
-  ASSERT_THAT(op->output_data_types, ::testing::ElementsAre(GetParam().second));
-}
-INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest,
-                        ::testing::ValuesIn(UnaryTestTypes()));
-
-TEST(ImportTest, FailedTypeInference) {
-  // Create a unary op with no Type ("T") annotation.
-  NodeDef node;
-  node.set_op("Atan");
-  node.set_name("Node1");
-  node.add_input();
-  node.set_input(0, "Node0");
-
-  Model model;
-  EXPECT_TRUE(ImportNode(node, &model).ok());
-
-  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
-  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
-  const TensorFlowUnsupportedOperator* op =
-      static_cast(
-          model.operators[0].get());
-  ASSERT_TRUE(op->output_data_types.empty());
-}
-
 }  // namespace
 }  // namespace toco
-- 
GitLab


From ec3f08e28f77309860fe7430a4567407bc26c5df Mon Sep 17 00:00:00 2001
From: Anna R 
Date: Thu, 13 Sep 2018 15:47:36 -0700
Subject: [PATCH 0171/1357] Fixing error output in api_compatibility_test.py.
 Looks like it should be self.maxDiff instead of self.maxDiffs: "Diff is 2708
 characters long. Set self.maxDiff to None to see it."

PiperOrigin-RevId: 212892831
---
 tensorflow/tools/api/tests/api_compatibility_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index 99bed5714f..d06c7f2d49 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -174,7 +174,7 @@ class ApiCompatibilityTest(test.TestCase):
         verbose_diff_message = diff_message
       else:
         # Do not truncate diff
-        self.maxDiffs = None  # pylint: disable=invalid-name
+        self.maxDiff = None  # pylint: disable=invalid-name
         # Now we can run an actual proto diff.
         try:
           self.assertProtoEquals(expected_dict[key], actual_dict[key])
-- 
GitLab


From 133a9ef4cb05e4a1a2122bdb5176e2954139c3c3 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy 
Date: Thu, 13 Sep 2018 15:47:40 -0700
Subject: [PATCH 0172/1357] Put a deprecation notice in cmake readme.

PiperOrigin-RevId: 212892844
---
 tensorflow/contrib/cmake/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index 0b79f718d4..789dab81ed 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -1,6 +1,10 @@
 TensorFlow CMake build
 ======================
 
+CMAKE build is deprecated for TensorFlow. Please use `bazel` to build TF for all
+platforms. For details, see the
+[TensorFlow install guide](https://www.tensorflow.org/install/).
+
 This directory contains CMake files for building TensorFlow on Microsoft
 Windows. [CMake](https://cmake.org) is a cross-platform tool that can
 generate build scripts for multiple build systems, including Microsoft
-- 
GitLab


From 4292b8107175b3c3223f65c75b3ca091bd0604ec Mon Sep 17 00:00:00 2001
From: Sanjoy Das 
Date: Thu, 13 Sep 2018 15:48:52 -0700
Subject: [PATCH 0173/1357] [TF:XLA] Bump open source abseil revision to
 8ff1374008259719b54a8cb128ef951c02da164c

PiperOrigin-RevId: 212893036
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 65314a4a06..25698da1c9 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -106,11 +106,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/02451914b9ad5320f81f56a89f3eef1f8683227c.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/02451914b9ad5320f81f56a89f3eef1f8683227c.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/8ff1374008259719b54a8cb128ef951c02da164c.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/8ff1374008259719b54a8cb128ef951c02da164c.tar.gz",
         ],
-        sha256 = "345fa25136484a9e5d918880d66ee577a9cb24377f8978d4e5a6c543706a1011",
-        strip_prefix = "abseil-cpp-02451914b9ad5320f81f56a89f3eef1f8683227c",
+        sha256 = "006931f9705484041eed65189038f87931a87cff200bb296f94b3d42339c4cd9",
+        strip_prefix = "abseil-cpp-8ff1374008259719b54a8cb128ef951c02da164c",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
     )
 
-- 
GitLab


From 29b56bde1e28e558111b917fd44b973e2aea7fcf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 16:08:12 -0700
Subject: [PATCH 0174/1357] Automated rollback of commit
 ac60b46e2c5962fd8099a4406c1788d826ad3c0d

PiperOrigin-RevId: 212896336
---
 tensorflow/compiler/jit/BUILD                 |   6 +
 .../jit/encapsulate_subgraphs_pass.cc         |  17 +
 .../compiler/jit/encapsulate_subgraphs_pass.h |   6 +
 .../jit/encapsulate_xla_computations_pass.cc  | 360 ++++++++++++++++++
 .../jit/encapsulate_xla_computations_pass.h   |  60 +++
 .../encapsulate_xla_computations_pass_test.cc | 346 +++++++++++++++++
 .../jit/jit_compilation_pass_registration.cc  |   9 +-
 tensorflow/compiler/jit/ops/xla_ops.cc        |  19 +
 tensorflow/compiler/tf2xla/BUILD              |   1 +
 tensorflow/compiler/tf2xla/test_util.cc       |   8 +
 tensorflow/compiler/tf2xla/test_util.h        |  16 +
 .../common_runtime/graph_execution_state.cc   |   4 +
 .../grappler/optimizers/meta_optimizer.cc     |  23 ++
 13 files changed, 874 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
 create mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
 create mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 7d5db713f6..f4e1bc5e83 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -363,6 +363,7 @@ cc_library(
         "deadness_analysis.cc",
         "deadness_analysis_internal.h",
         "encapsulate_subgraphs_pass.cc",
+        "encapsulate_xla_computations_pass.cc",
         "mark_for_compilation_pass.cc",
         "mark_for_compilation_pass_test_helper.cc",
         "partially_decluster_pass.cc",
@@ -371,6 +372,7 @@ cc_library(
         "build_xla_launch_ops_pass.h",
         "deadness_analysis.h",
         "encapsulate_subgraphs_pass.h",
+        "encapsulate_xla_computations_pass.h",
         "mark_for_compilation_pass.h",
         "mark_for_compilation_pass_test_helper.h",
         "partially_decluster_pass.h",
@@ -397,6 +399,7 @@ cc_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/kernels:bounds_check",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -475,6 +478,7 @@ tf_cc_test(
     size = "small",
     srcs = [
         "encapsulate_subgraphs_pass_test.cc",
+        "encapsulate_xla_computations_pass_test.cc",
         "mark_for_compilation_pass_test.cc",
         "partially_decluster_pass_test.cc",
     ],
@@ -490,7 +494,9 @@ tf_cc_test(
         "//tensorflow/cc:resource_variable_ops",
         "//tensorflow/cc:sendrecv_ops",
         "//tensorflow/compiler/jit/kernels:xla_launch_op",
+        "//tensorflow/compiler/tf2xla:test_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index ae7a22f451..e0632ff7e4 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include 
 #include 
 
+#include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
@@ -58,6 +59,22 @@ const char* const kXlaNumResourceArgsAttr = "_XlaNumResourceArgs";
 const char* const kXlaHostTransferSequencerAttr =
     "_xla_host_transfer_sequencer";
 
+void SortControlInputs(GraphDef* gdef) {
+  int64 num_nodes = gdef->node_size();
+  for (int64 i = 0; i < num_nodes; ++i) {
+    NodeDef* node = gdef->mutable_node(i);
+    // Stable sort control inputs and leave the order of data inputs unchanged.
+    std::stable_sort(node->mutable_input()->begin(),
+                     node->mutable_input()->end(),
+                     [](const string& a, const string& b) {
+                       bool a_is_control = absl::StartsWith(a, "^");
+                       bool b_is_control = absl::StartsWith(b, "^");
+                       return (!a_is_control && b_is_control) ||
+                              (a_is_control && b_is_control && a < b);
+                     });
+  }
+}
+
 namespace {
 
 bool AreAllParentsGuaranteedConst(
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
index 926589546f..90354a801a 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h
@@ -102,6 +102,12 @@ extern const char* const kXlaNumConstantArgsAttr;
 // Name of the attribute containing the number of resource variable arguments.
 extern const char* const kXlaNumResourceArgsAttr;
 
+// Sorts each node's control inputs by their names. This guarantees that for two
+// structually equivalent GraphDefs, we get the same traversal ordering on
+// node's control input fields.
+// TODO(hpucha): Move the utilities to a more appropriate place.
+void SortControlInputs(GraphDef* gdef);
+
 class EncapsulateSubgraphsPass : public GraphOptimizationPass {
  public:
   Status Run(const GraphOptimizationPassOptions& options) override;
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
new file mode 100644
index 0000000000..97ef8cd3cb
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -0,0 +1,360 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
+
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/dump_graph.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/lib/strings/proto_serialization.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/fingerprint.h"
+
+namespace tensorflow {
+
+const char* const EncapsulateXlaComputationsPass::kXlaClusterAttr =
+    "_xla_compile_id";
+
+namespace {
+
+const char* const kXlaClusterOutput = "XlaClusterOutput";
+
+// Checks if a graph node is marked to be a guaranteed constant.
+bool is_guaranteed_constant(const Node& n) {
+  bool guaranteed_constant = false;
+  if (!GetNodeAttr(n.attrs(), "_is_guaranteed_constant", &guaranteed_constant)
+           .ok()) {
+    return false;
+  }
+  return guaranteed_constant;
+}
+
+// Finds the `index` of an _Arg or _Retval node.
+Status GetIndexAttr(const Node& n, int num_args, int* index) {
+  TF_RETURN_IF_ERROR(GetNodeAttr(n.attrs(), "index", index));
+  if (*index < 0 || *index >= num_args) {
+    return errors::InvalidArgument("Invalid ", n.type_string(), " number ",
+                                   *index);
+  }
+  return Status::OK();
+}
+
+// Returns the data type of the destination of an edge.
+DataType EdgeType(const Edge* edge) {
+  return edge->dst()->input_type(edge->dst_input());
+}
+
+// Adds the control inputs of `node` to `*deps`.
+void AddControlInputs(const Node& node, gtl::FlatSet* deps) {
+  for (const Edge* edge : node.in_edges()) {
+    if (edge->IsControlEdge()) {
+      deps->insert(edge->src());
+    }
+  }
+}
+
+// Adds the control outputs of `node` to `*deps`.
+void AddControlOutputs(const Node& node, gtl::FlatSet* deps) {
+  for (const Edge* edge : node.out_edges()) {
+    if (edge->IsControlEdge()) {
+      deps->insert(edge->dst());
+    }
+  }
+}
+
+// Rewrite function to be passed to EncapsulateSubgraphsInFunctions that sorts
+// the arguments into the order expected by XlaLaunch computations:
+// 1) arguments
+// 2) resource variable arguments
+// See the documentation of EncapsulateSubgraphsInFunctions for the meaning
+// of the arguments.
+//
+// TODO(b/113166435): Ordering constraints on XlaLaunch op can be relaxed.
+Status RewriteSubgraph(const std::vector& arg_source_tensors,
+                       std::unique_ptr* graph_ptr,
+                       std::vector* input_permutation,
+                       std::vector* output_permutation,
+                       NodeDef* call_def) {
+  Graph* graph = graph_ptr->get();
+  const int num_args = input_permutation->size();
+  const int num_retvals = output_permutation->size();
+
+  std::vector args;
+  std::vector retvals;
+  args.reserve(num_args);
+  retvals.reserve(num_retvals);
+  for (Node* n : graph->nodes()) {
+    if (n->type_string() == "_Arg") {
+      // Check if this is a guaranteed constant.
+      if (is_guaranteed_constant(*n)) {
+        return errors::InvalidArgument(
+            "Guaranteed constants are not supported (", n->name(), ")");
+      }
+      args.push_back(n);
+    } else if (n->type_string() == "_Retval") {
+      retvals.push_back(n);
+    }
+  }
+
+  if (std::find(args.begin(), args.end(), nullptr) != args.end()) {
+    return errors::InvalidArgument("Missing or non-consecutive arguments");
+  }
+
+  // Reorders the arguments.
+  std::sort(args.begin(), args.end(), [&](Node* a, Node* b) {
+    // Non-resources appear before resources
+    bool a_is_resource = (a->output_type(0) == DT_RESOURCE);
+    bool b_is_resource = (b->output_type(0) == DT_RESOURCE);
+    // Uses the name as a tiebreaker so the output is deterministic.
+    StringPiece a_name(a->name());
+    StringPiece b_name(b->name());
+    return std::tie(a_is_resource, a_name) < std::tie(b_is_resource, b_name);
+  });
+
+  // Sorts the retvals by name so the order is deterministic.
+  std::sort(retvals.begin(), retvals.end(),
+            [](Node* a, Node* b) { return a->name() < b->name(); });
+
+  // Computes the permutation to produce the correct argument order, and update
+  // the argument indices.
+  int variable_start_index = num_args;
+  for (int i = 0; i < num_args; ++i) {
+    int index;
+    TF_RETURN_IF_ERROR(GetIndexAttr(*args[i], num_args, &index));
+    if (args[i]->output_type(0) == DT_RESOURCE &&
+        variable_start_index == num_args) {
+      variable_start_index = i;
+    }
+    (*input_permutation)[index] = i;
+    args[i]->AddAttr("index", i);
+  }
+  VLOG(4) << "variable_start_index: " << variable_start_index;
+
+  // Computes the permutation to produce the correct retval order, and update
+  // the argument indices.
+  for (int i = 0; i < num_retvals; ++i) {
+    int index;
+    TF_RETURN_IF_ERROR(GetIndexAttr(*retvals[i], num_retvals, &index));
+    (*output_permutation)[index] = i;
+    retvals[i]->AddAttr("index", i);
+  }
+
+  AddNodeAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, call_def->name(),
+              call_def);
+  AddNodeAttr("_variable_start_index", variable_start_index, call_def);
+
+  // Uniquify the function name.
+  GraphDef gdef;
+  graph->ToGraphDef(&gdef);
+
+  // Before serialization, sort each node's control inputs to achieve
+  // determinism. Sorting control inputs could help (but not necessarily) create
+  // a deterministic serialization and fingerprint. Other sources of
+  // nondeterminism include unstable node ordering.
+  SortControlInputs(&gdef);
+  // Fingerprint the function.
+  // Nondeterminism in serialization would not lead to incorrect results, but
+  // may cause spurious cache misses. DeterministicSerialization is a
+  // best-effort deterministic serialization.
+  string serialized;
+  TF_RET_CHECK(SerializeToStringDeterministic(gdef, &serialized));
+  uint64 fingerprint = Fingerprint64(serialized);
+  LOG(INFO) << "Subgraph fingerprint:" << fingerprint;
+  call_def->set_op(absl::StrCat(call_def->op(), "_", fingerprint));
+  return Status::OK();
+}
+
+}  // namespace
+
+/*static*/ Status EncapsulateXlaComputationsPass::Encapsulate(
+    std::unique_ptr* graph, FunctionLibraryDefinition* flib_def) {
+  // Check for undeclared outputs before Encapsulation, so we can give a better
+  // error message.
+  // TODO(phawkins): merge this with the encapsulation code to avoid the extra
+  // O(n) pass over the edges.
+  for (const Edge* e : (*graph)->edges()) {
+    if (!e->IsControlEdge() &&
+        e->src()->attrs().Find(kXlaClusterAttr) != nullptr &&
+        e->dst()->attrs().Find(kXlaClusterAttr) == nullptr &&
+        e->dst()->type_string() != kXlaClusterOutput) {
+      return errors::InvalidArgument(
+          "Undeclared output of XLA computation. A common cause of this error "
+          "is variable initializers that depend on the XLA computation. Edge: ",
+          e->src()->name(), ":", e->src_output(), " -> ", e->dst()->name(), ":",
+          e->dst_input());
+    }
+  }
+
+  auto output = absl::make_unique((*graph)->op_registry());
+  TF_RETURN_WITH_CONTEXT_IF_ERROR(
+      EncapsulateSubgraphsInFunctions(
+          kXlaClusterAttr, "", **graph, RewriteSubgraph,
+          /*reuse_existing_functions=*/true, &output, flib_def),
+      "EncapsulateXlaComputationsPass failed");
+  graph->swap(output);
+  return Status::OK();
+}
+
+/*static*/ Status EncapsulateXlaComputationsPass::BuildXlaLaunchOps(
+    Graph* graph) {
+  // Finds all of the XlaLaunch function calls, to avoid mutating the graph
+  // while iterating.
+  std::vector launch_nodes;
+  for (Node* n : graph->nodes()) {
+    string name;
+    if (GetNodeAttr(n->attrs(), kXlaClusterAttr, &name).ok()) {
+      launch_nodes.push_back(n);
+    }
+  }
+
+  // Replaces each launch function call together with its neighboring
+  // XlaClusterOutput nodes with a XlaLaunch node.
+  for (Node* launch : launch_nodes) {
+    int variable_start_index;
+    TF_RETURN_IF_ERROR(GetNodeAttr(launch->attrs(), "_variable_start_index",
+                                   &variable_start_index));
+
+    std::vector in_edges;
+    TF_RETURN_IF_ERROR(launch->input_edges(&in_edges));
+
+    const int num_inputs = in_edges.size();
+    const int num_variables = num_inputs - variable_start_index;
+    const int num_args = variable_start_index;
+
+    VLOG(4) << "Launch node '" << launch->name() << "'"
+            << " input edges: " << in_edges.size() << " num_args: " << num_args
+            << " num_variables: " << num_variables;
+
+    std::vector nodes_to_remove = {launch};
+
+    // Data and control inputs to the new XlaLaunch node.
+    std::vector> data_inputs(num_inputs);
+    gtl::FlatSet control_inputs;
+    DataTypeVector arg_types(num_args);
+
+    AddControlInputs(*launch, &control_inputs);
+
+    for (int i = 0; i < num_args; ++i) {
+      const Edge* edge = in_edges[i];
+      data_inputs[i] = {edge->src(), edge->src_output()};
+      arg_types[i] = EdgeType(edge);
+    }
+
+    // Appends the variable inputs.
+    for (int i = 0; i < num_variables; ++i) {
+      int pos = variable_start_index + i;
+      const Edge* edge = in_edges[pos];
+      data_inputs[pos] = {edge->src(), edge->src_output()};
+    }
+
+    // Outputs.
+    const int num_outputs = launch->output_types().size();
+    gtl::FlatSet control_outputs;
+    std::vector>> data_outputs(num_outputs);
+    DataTypeVector output_types(num_outputs);
+
+    for (const Edge* le : launch->out_edges()) {
+      if (le->IsControlEdge()) {
+        control_outputs.insert(le->dst());
+      } else {
+        TF_RET_CHECK(le->src_output() < num_outputs);
+        Node* output_node = le->dst();
+
+        TF_RET_CHECK(output_node->type_string() == kXlaClusterOutput)
+            << le->DebugString();
+        nodes_to_remove.push_back(output_node);
+
+        for (const Edge* oe : output_node->out_edges()) {
+          TF_RET_CHECK(!oe->IsControlEdge());
+          data_outputs[le->src_output()].push_back(
+              {oe->dst(), oe->dst_input()});
+        }
+        output_types[le->src_output()] = output_node->input_type(0);
+
+        AddControlOutputs(*output_node, &control_outputs);
+      }
+    }
+
+    NodeDef def;
+    def.set_name(launch->name());
+
+    // Target the XLA CPU/GPU backends.
+    VLOG(2) << "Replacing with XlaLaunch";
+    def.set_op("XlaLaunch");
+    AddNodeAttr("Tconstants", DataTypeVector{}, &def);
+    AddNodeAttr("Targs", arg_types, &def);
+    AddNodeAttr("Nresources", num_variables, &def);
+    AddNodeAttr("Tresults", output_types, &def);
+    NameAttrList function;
+    function.set_name(launch->type_string());
+    AddNodeAttr("function", function, &def);
+
+    for (Node* node : nodes_to_remove) {
+      VLOG(2) << "Deleting node " << node->DebugString();
+      // Ensure that we do not attempt to add control edges to nodes that are
+      // deleted.
+      control_inputs.erase(node);
+      control_outputs.erase(node);
+      graph->RemoveNode(node);
+    }
+
+    Status status;
+    Node* xla_launch = graph->AddNode(def, &status);
+    if (!status.ok()) {
+      return status;
+    }
+    for (int i = 0; i < data_inputs.size(); ++i) {
+      graph->AddEdge(data_inputs[i].first, data_inputs[i].second, xla_launch,
+                     i);
+    }
+    for (Node* n : control_inputs) {
+      graph->AddControlEdge(n, xla_launch);
+    }
+    for (int i = 0; i < data_outputs.size(); ++i) {
+      for (const auto& successor : data_outputs[i]) {
+        graph->AddEdge(xla_launch, i, successor.first, successor.second);
+      }
+    }
+    for (Node* n : control_outputs) {
+      graph->AddControlEdge(xla_launch, n);
+    }
+  }
+  return Status::OK();
+}
+
+Status EncapsulateXlaComputationsPass::Run(
+    const GraphOptimizationPassOptions& options) {
+  VLOG(1) << "EncapsulateXlaComputations(): "
+          << dump_graph::DumpGraphToFile("encapsulate_xla_computations_before",
+                                         **options.graph, options.flib_def);
+
+  TF_RETURN_IF_ERROR(Encapsulate(options.graph, options.flib_def));
+  VLOG(1) << "EncapsulateXlaComputations() half-way: "
+          << dump_graph::DumpGraphToFile("encapsulate_xla_computations_halfway",
+                                         **options.graph, options.flib_def);
+
+  TF_RETURN_IF_ERROR(BuildXlaLaunchOps(options.graph->get()));
+  VLOG(1) << "EncapsulateXlaComputations() finished: "
+          << dump_graph::DumpGraphToFile("encapsulate_xla_computations_after",
+                                         **options.graph, options.flib_def);
+  return Status::OK();
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
new file mode 100644
index 0000000000..99e9dfd598
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h
@@ -0,0 +1,60 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ==============================================================================*/
+// Rewrites computations generated by the xla.compile() Python code into
+// XlaLaunch nodes.
+//
+// xla.compile() does two main things:
+// a) marks operators that make up an XLA computation with the attribute
+//    _xla_compile_id=XYZ, where XYZ is a unique key.
+// b) adds XlaClusterOutput nodes to represent outputs of the computation.
+//    These nodes are not marked with the _xla_compile_id attribute.
+
+#ifndef TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_
+#define TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_
+
+#include "tensorflow/core/common_runtime/optimization_registry.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/platform/env.h"
+
+    namespace tensorflow {
+
+// Encapsulates nodes marked with the _xla_compile_id attribute into
+// XlaLaunch operators.
+class EncapsulateXlaComputationsPass : public GraphOptimizationPass {
+ public:
+  static const char* const kXlaClusterAttr;  // _xla_compile_id
+
+  Status Run(const GraphOptimizationPassOptions& options) override;
+
+  // The following methods are public only for unit tests.
+
+  // This pass has two stages:
+  // a) first, we call EncapsulateSubgraphsPass to encapsulate all nodes
+  //    marked with the same _xla_compile_id attribute into functions. These
+  //    functions contain the computations to be passed to XlaLaunch. During
+  //    encapsulation, we sort the arguments into the order expected by
+  //    XlaLaunch.
+  static Status Encapsulate(std::unique_ptr* graph,
+                            FunctionLibraryDefinition* flib_def);
+
+  // b) we rewrite the function calls generated in phase (a) into XlaLaunch
+  //    operators. We also convert the XlaClusterOutput output nodes of the
+  //    function call into the outputs of the XlaLaunch operator.
+  static Status BuildXlaLaunchOps(Graph* graph);
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
new file mode 100644
index 0000000000..f643fb0cfe
--- /dev/null
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc
@@ -0,0 +1,346 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
+
+#include "tensorflow/cc/ops/function_ops.h"
+#include "tensorflow/cc/ops/resource_variable_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_op.h"
+#include "tensorflow/compiler/tf2xla/test_util.h"
+#include "tensorflow/core/framework/graph_to_functiondef.h"
+#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/hash/hash.h"
+#include "tensorflow/core/lib/strings/proto_serialization.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/util/equal_graph_def.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+
+static std::unique_ptr MakeOuterGraph(
+    const FunctionLibraryDefinition& flib_def, const string& function) {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib_def.ToProto()));
+
+  auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32);
+  auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT);
+  auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32);
+  auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT);
+  auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE);
+  auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
+  auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);
+
+  NodeDef def;
+  TF_CHECK_OK(
+      NodeDefBuilder("launch0", function, &flib_def)
+          .Input(a.node()->name(), 0, DT_INT32)
+          .Input(b.node()->name(), 0, DT_FLOAT)
+          .Input(c.node()->name(), 0, DT_INT32)
+          .Input(d.node()->name(), 0, DT_FLOAT)
+          .Input(u.node()->name(), 0, DT_RESOURCE)
+          .Input(v.node()->name(), 0, DT_RESOURCE)
+          .Input(w.node()->name(), 0, DT_RESOURCE)
+          .Attr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0")
+          .Attr("_variable_start_index", 4)
+          .Finalize(&def));
+
+  Status status;
+  Node* launch = scope.graph()->AddNode(def, &status);
+  TF_CHECK_OK(status);
+  TF_CHECK_OK(scope.DoShapeInference(launch));
+  scope.graph()->AddEdge(a.node(), 0, launch, 0);
+  scope.graph()->AddEdge(b.node(), 0, launch, 1);
+  scope.graph()->AddEdge(c.node(), 0, launch, 2);
+  scope.graph()->AddEdge(d.node(), 0, launch, 3);
+  scope.graph()->AddEdge(u.node(), 0, launch, 4);
+  scope.graph()->AddEdge(v.node(), 0, launch, 5);
+  scope.graph()->AddEdge(w.node(), 0, launch, 6);
+
+  auto out0 =
+      ops::XlaClusterOutput(scope.WithOpName("Out0"), Output(launch, 0));
+  auto out1 =
+      ops::XlaClusterOutput(scope.WithOpName("Out1"), Output(launch, 1));
+  auto out2 =
+      ops::XlaClusterOutput(scope.WithOpName("Out2"), Output(launch, 2));
+  auto out3 =
+      ops::XlaClusterOutput(scope.WithOpName("Out3"), Output(launch, 3));
+
+  auto consumer0_a = ops::Identity(scope.WithOpName("consumer0_a"), out0);
+  auto consumer0_b = ops::Identity(scope.WithOpName("consumer0_b"), out0);
+  auto consumer0_c = ops::Identity(scope.WithOpName("consumer0_c"), out0);
+  auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1);
+  auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2);
+  auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3);
+
+  std::unique_ptr graph(new Graph(OpRegistry::Global()));
+  TF_CHECK_OK(scope.ToGraph(graph.get()));
+  return graph;
+}
+
+// Makes an encapsulate body graph for use in tests.
+static std::unique_ptr MakeBodyGraph() {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+
+  auto arg0 = ops::_Arg(scope.WithOpName("a_0_arg"), DT_INT32, 0);
+  auto arg1 = ops::_Arg(scope.WithOpName("b_0_arg"), DT_FLOAT, 1);
+  auto arg2 = ops::_Arg(scope.WithOpName("c_0_arg"), DT_INT32, 2);
+  auto arg3 = ops::_Arg(scope.WithOpName("d_0_arg"), DT_FLOAT, 3);
+
+  auto arg4 = ops::_Arg(scope.WithOpName("u_0_arg"), DT_RESOURCE, 4);
+  auto arg5 = ops::_Arg(scope.WithOpName("v_0_arg"), DT_RESOURCE, 5);
+  auto arg6 = ops::_Arg(scope.WithOpName("w_0_arg"), DT_RESOURCE, 6);
+
+  auto add_attrs = [](Node* node) {
+    node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+  };
+
+  auto b_identity = ops::Identity(scope.WithOpName("B_identity"), arg1);
+
+  auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), arg4, DT_FLOAT);
+  add_attrs(read_u.node());
+  auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), arg5, DT_FLOAT);
+  add_attrs(read_v.node());
+  auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), arg6, DT_FLOAT);
+  add_attrs(read_w.node());
+
+  auto e = ops::Add(scope.WithOpName("E"), arg0, arg2);
+  add_attrs(e.node());
+  auto f = ops::Add(scope.WithOpName("F"), read_v, read_w);
+  add_attrs(f.node());
+  auto g = ops::Add(scope.WithOpName("G"), f, arg3);
+  add_attrs(g.node());
+
+  auto out0 = ops::_Retval(scope.WithOpName("b_identity_0_retval_RetVal"),
+                           b_identity, 0);
+  auto out1 = ops::_Retval(scope.WithOpName("e_0_retval_RetVal"), e, 1);
+  auto out2 = ops::_Retval(scope.WithOpName("g_0_retval_RetVal"), g, 2);
+  auto out3 =
+      ops::_Retval(scope.WithOpName("readu_0_retval_RetVal"), read_u, 3);
+
+  std::unique_ptr graph(new Graph(OpRegistry::Global()));
+  TF_CHECK_OK(scope.ToGraph(graph.get()));
+  return graph;
+}
+
+TEST(EncapsulateXlaComputations, DeterministicEncapsulate) {
+  // Test that control edge insertion order doesn't affect the cache key
+  // (cluster name) generated by TPU encapsulate pass.
+  auto get_serialized_graph = [](bool control_input_reversed,
+                                 bool operand_reversed) -> string {
+    FunctionLibraryDefinition flib_def(OpRegistry::Global(), {});
+    std::unique_ptr graph(new Graph(&flib_def));
+    {
+      Scope scope = Scope::NewRootScope().ExitOnError();
+      auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32);
+      auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32);
+
+      ops::Add e = operand_reversed ? ops::Add(scope.WithOpName("E"), a0, a1)
+                                    : ops::Add(scope.WithOpName("E"), a1, a0);
+
+      auto add_attrs = [](Node* node) {
+        node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr,
+                      "launch0");
+      };
+      add_attrs(e.node());
+
+      TF_CHECK_OK(scope.ToGraph(graph.get()));
+      auto get_node_in_graph = [&graph](Node* node) {
+        return graph->FindNodeId(node->id());
+      };
+      // Insert control edge in different order. The order should not affect
+      // the encapsulated or serialized graph.
+      if (!control_input_reversed) {
+        graph->AddControlEdge(get_node_in_graph(a0.node()),
+                              get_node_in_graph(e.node()), true);
+        graph->AddControlEdge(get_node_in_graph(a1.node()),
+                              get_node_in_graph(e.node()), true);
+      } else {
+        graph->AddControlEdge(get_node_in_graph(a1.node()),
+                              get_node_in_graph(e.node()), true);
+        graph->AddControlEdge(get_node_in_graph(a0.node()),
+                              get_node_in_graph(e.node()), true);
+      }
+    }
+    TF_CHECK_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def));
+    GraphDef gdef;
+    graph->ToGraphDef(&gdef);
+    // Before serialization, sort control inputs first to remove
+    // nondeterminism.
+    SortControlInputs(&gdef);
+    string serialized;
+    SerializeToStringDeterministic(gdef, &serialized);
+    return serialized;
+  };
+
+  // Changing the order of control input shouldn't affect the graph generated.
+  EXPECT_EQ(get_serialized_graph(/*control_input_reversed=*/true,
+                                 /*operand_reversed=*/false),
+            get_serialized_graph(/*control_input_reversed=*/false,
+                                 /*operand_reversed=*/false));
+
+  // Changing the order of data input should affect the graph generated.
+  EXPECT_NE(get_serialized_graph(/*control_input_reversed=*/false,
+                                 /*operand_reversed=*/true),
+            get_serialized_graph(/*control_input_reversed=*/false,
+                                 /*operand_reversed=*/false));
+}
+
+TEST(EncapsulateXlaComputations, Encapsulate) {
+  FunctionLibraryDefinition flib_def(OpRegistry::Global(), {});
+  std::unique_ptr graph(new Graph(&flib_def));
+  {
+    Scope scope = Scope::NewRootScope().ExitOnError();
+    auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32);
+    auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT);
+    auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32);
+    auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT);
+    auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE);
+    auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
+    auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);
+
+    auto add_attrs = [](Node* node) {
+      node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0");
+    };
+
+    auto b_identity = ops::Identity(scope.WithOpName("B_identity"), b);
+    add_attrs(b_identity.node());
+
+    auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), u, DT_FLOAT);
+    add_attrs(read_u.node());
+    auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), v, DT_FLOAT);
+    add_attrs(read_v.node());
+    auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), w, DT_FLOAT);
+    add_attrs(read_w.node());
+
+    auto e = ops::Add(scope.WithOpName("E"), a, c);
+    add_attrs(e.node());
+    auto f = ops::Add(scope.WithOpName("F"), read_v, read_w);
+    add_attrs(f.node());
+    auto g = ops::Add(scope.WithOpName("G"), f, d);
+    add_attrs(g.node());
+
+    auto out0 = ops::XlaClusterOutput(scope.WithOpName("Out0"), b_identity);
+    auto out1 = ops::XlaClusterOutput(scope.WithOpName("Out1"), e);
+    auto out2 = ops::XlaClusterOutput(scope.WithOpName("Out2"), g);
+    auto out3 = ops::XlaClusterOutput(scope.WithOpName("Out3"), read_u);
+
+    auto consumer0_a = ops::Identity(scope.WithOpName("consumer0_a"), out0);
+    auto consumer0_b = ops::Identity(scope.WithOpName("consumer0_b"), out0);
+    auto consumer0_c = ops::Identity(scope.WithOpName("consumer0_c"), out0);
+    auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1);
+    auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2);
+    auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3);
+    TF_ASSERT_OK(scope.ToGraph(graph.get()));
+  }
+
+  std::unique_ptr graph_copy(new Graph(&flib_def));
+  CopyGraph(*graph, graph_copy.get());
+
+  TF_ASSERT_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def));
+
+  std::unordered_map index = BuildNodeIndex(*graph);
+  string function = index.at("launch0")->type_string();
+
+  // Tests the outer graph is as expected.
+  {
+    std::unique_ptr outer = MakeOuterGraph(flib_def, function);
+    GraphDef expected_def;
+    outer->ToGraphDef(&expected_def);
+
+    GraphDef actual_def;
+    graph->ToGraphDef(&actual_def);
+    TF_EXPECT_GRAPH_EQ_INTERNAL(expected_def, actual_def);
+  }
+
+  // Tests the encapsulated body graph is as expected.
+  {
+    std::unique_ptr body = MakeBodyGraph();
+    GraphDef expected_body_def;
+    body->ToGraphDef(&expected_body_def);
+
+    InstantiationResultForTest result;
+    TF_EXPECT_OK(InstantiateFunctionForTest(function, flib_def, &result));
+
+    EXPECT_EQ((DataTypeVector{DT_INT32, DT_FLOAT, DT_INT32, DT_FLOAT,
+                              DT_RESOURCE, DT_RESOURCE, DT_RESOURCE}),
+              result.arg_types);
+    EXPECT_EQ((DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}),
+              result.ret_types);
+    TF_EXPECT_GRAPH_EQ(expected_body_def, result.gdef);
+  }
+
+  // Encapsulates the same computation again, verifies we reuse the same
+  // function. Encapsulation should be deterministic to avoid recompilation.
+  TF_ASSERT_OK(
+      EncapsulateXlaComputationsPass::Encapsulate(&graph_copy, &flib_def));
+  std::unordered_map index_copy = BuildNodeIndex(*graph_copy);
+  string function_copy = index_copy.at("launch0")->type_string();
+  EXPECT_EQ(function, function_copy);
+}
+
+TEST(EncapsulateXlaComputations, BuildXlaLaunchOp) {
+  std::unique_ptr body_graph = MakeBodyGraph();
+  FunctionDefLibrary flib;
+  TF_ASSERT_OK(GraphToFunctionDef(*body_graph, "launch0", flib.add_function()));
+
+  FunctionLibraryDefinition flib_def(OpRegistry::Global(), flib);
+
+  std::unique_ptr graph = MakeOuterGraph(flib_def, "launch0");
+  TF_ASSERT_OK(EncapsulateXlaComputationsPass::BuildXlaLaunchOps(graph.get()));
+
+  Scope scope = Scope::DisabledShapeInferenceScope().ExitOnError();
+  TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib));
+
+  auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32);
+  auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT);
+  auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32);
+  auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT);
+  auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE);
+  auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE);
+  auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE);
+
+  NameAttrList function;
+  function.set_name("launch0");
+  auto launch = ops::XlaLaunch(
+      scope.WithOpName("launch0"), std::initializer_list{},
+      std::initializer_list{a, b, c, d},
+      std::initializer_list{u, v, w},
+      DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}, function);
+
+  auto consumer0_a =
+      ops::Identity(scope.WithOpName("consumer0_a"), launch.results[0]);
+  auto consumer0_b =
+      ops::Identity(scope.WithOpName("consumer0_b"), launch.results[0]);
+  auto consumer0_c =
+      ops::Identity(scope.WithOpName("consumer0_c"), launch.results[0]);
+  auto consumer1 =
+      ops::Identity(scope.WithOpName("consumer1"), launch.results[1]);
+  auto consumer2 =
+      ops::Identity(scope.WithOpName("consumer2"), launch.results[2]);
+  auto consumer3 =
+      ops::Identity(scope.WithOpName("consumer3"), launch.results[3]);
+
+  GraphDef expected_def;
+  TF_ASSERT_OK(scope.ToGraphDef(&expected_def));
+
+  GraphDef actual_def;
+  graph->ToGraphDef(&actual_def);
+  TF_EXPECT_GRAPH_EQ(expected_def, actual_def);
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
index 5dcf754969..3770eea6d0 100644
--- a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
+++ b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/jit/build_xla_launch_ops_pass.h"
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
+#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h"
 #include "tensorflow/compiler/jit/mark_for_compilation_pass.h"
 #include "tensorflow/compiler/jit/partially_decluster_pass.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
@@ -23,6 +24,11 @@ namespace tensorflow {
 
 // PRE_PLACEMENT passes:
 
+// EncapsulateXlaComputationsPass rewrites computations generated by the
+// xla.compile() Python code into XlaLaunch nodes.
+REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 26,
+                      EncapsulateXlaComputationsPass);
+
 // from
 // third_party/tensorflow/compiler/tf2xla/functionalize_control_flow_pass_registration.cc
 // FunctionalizeControlFlowPass: 27
@@ -32,7 +38,8 @@ namespace tensorflow {
 // control flow structure (XlaIf/XlaWhile). Following passes must
 // handle those FunctionDef correctly.
 
-// POST_REWRITE_FOR_EXEC passes:
+// POST_REWRITE_FOR_EXEC passes that support auto-clustering to enable XLA:
+
 REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 10,
                       MarkForCompilationPass);
 
diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc
index f2473d98ff..1a29c3caab 100644
--- a/tensorflow/compiler/jit/ops/xla_ops.cc
+++ b/tensorflow/compiler/jit/ops/xla_ops.cc
@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
 
 namespace tensorflow {
 
+using shape_inference::InferenceContext;
+
 REGISTER_OP("XlaLaunch")
     .Input("constants: Tconstants")
     .Attr("Tconstants: list(type) >= 0")
@@ -32,4 +36,19 @@ REGISTER_OP("XlaLaunch")
     .SetIsStateful()
     .Doc("XLA Launch Op. For use by the XLA JIT only.");
 
+REGISTER_OP("XlaClusterOutput")
+    .Input("input: T")
+    // Note: when replication is supported, this op will have N outputs.
+    .Output("outputs: T")
+    .Attr("T: type")
+    .SetShapeFn([](InferenceContext* c) {
+      for (int i = 0; i < c->num_outputs(); ++i) {
+        c->set_output(i, c->input(0));
+      }
+      return Status::OK();
+    })
+    .Doc(
+        "Operator that connects the output of an XLA computation to other "
+        "consumer graph nodes.");
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index d549e7bb59..ba1e3b2b4f 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -611,6 +611,7 @@ cc_library(
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
     ],
diff --git a/tensorflow/compiler/tf2xla/test_util.cc b/tensorflow/compiler/tf2xla/test_util.cc
index 3c6c9a91b6..f31bfb45a2 100644
--- a/tensorflow/compiler/tf2xla/test_util.cc
+++ b/tensorflow/compiler/tf2xla/test_util.cc
@@ -40,4 +40,12 @@ Status InstantiateFunctionForTest(const string& name,
   return Status::OK();
 }
 
+std::unordered_map BuildNodeIndex(const Graph& graph) {
+  std::unordered_map index;
+  for (Node* node : graph.nodes()) {
+    index[node->name()] = node;
+  }
+  return index;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/test_util.h b/tensorflow/compiler/tf2xla/test_util.h
index e6e4ae92ed..350a868568 100644
--- a/tensorflow/compiler/tf2xla/test_util.h
+++ b/tensorflow/compiler/tf2xla/test_util.h
@@ -24,8 +24,10 @@ limitations under the License.
 
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/util/equal_graph_def.h"
 
 namespace tensorflow {
 
@@ -42,6 +44,20 @@ Status InstantiateFunctionForTest(const string& name,
                                   const FunctionLibraryDefinition& library,
                                   InstantiationResultForTest* result);
 
+// Builds a map from node name to Node* for `graph`.
+std::unordered_map BuildNodeIndex(const Graph& graph);
+
 }  // namespace tensorflow
 
+// Variant of TF_EXPECT_GRAPH_EQ that also compares internal attributes for
+// equality.
+#define TF_EXPECT_GRAPH_EQ_INTERNAL(expected, actual)               \
+  do {                                                              \
+    string diff;                                                    \
+    EqualGraphDefOptions eq_options;                                \
+    eq_options.ignore_internal_attrs = false;                       \
+    EXPECT_TRUE(EqualGraphDef(actual, expected, &diff, eq_options)) \
+        << diff << "\nActual: " << SummarizeGraphDef(actual);       \
+  } while (false)
+
 #endif  // TENSORFLOW_COMPILER_TF2XLA_TEST_UTIL_H_
diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc
index 7f260b3139..4475fa979e 100644
--- a/tensorflow/core/common_runtime/graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/graph_execution_state.cc
@@ -561,6 +561,10 @@ Status GraphExecutionState::OptimizeGraph(
     grappler::GrapplerItem item;
     item.id = "tf_graph";
     graph_->ToGraphDef(&item.graph);
+    // TODO(b/114748242): Add a unit test to test this bug fix.
+    if (flib_def_) {
+      *item.graph.mutable_library() = flib_def_->ToProto();
+    }
 
     item.fetch.insert(item.fetch.end(),
                       options.callable_options.fetch().begin(),
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 8c99598748..7ed4a67333 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -72,6 +72,16 @@ bool IsRunOnceOptimizer(const string& name) {
          name == "loop_optimizer";
 }
 
+// Check if the graphdef contains nodes that indicate TPU execution.
+bool IsTPUGraphDef(const GraphDef& def) {
+  for (auto node : def.node()) {
+    if (node.op() == "TPUCompile" || node.op() == "TPUPartitionedCall") {
+      return true;
+    }
+  }
+  return false;
+}
+
 }  // namespace
 
 #define MK_OPT(NAME, VALUE) \
@@ -338,6 +348,19 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph));
   VLOG(1) << "Optimized main graph.";
 
+  // Skip optimizing functions if this is a TPU graph. Currently, Grappler
+  // passes do not handle TPU functions correctly in a variety of ways (Note
+  // that due to the pre-placement TPU graph rewriting passes, the TPU-related
+  // ops are encapsulated away into functions). For example, TPU graphs contain
+  // TPUReplicateMetadata node that carries relevant TPU metadata and Grappler
+  // passes could prune that away. Grappler passes could also cause issues
+  // around shape inference. Since the desired and existing behavior is to not
+  // optimize TPU functions with Grappler, this check preserves that.
+  if (IsTPUGraphDef(*optimized_graph)) {
+    VLOG(2) << "Skipping optimizing funcs for TPU graphs";
+    return Status::OK();
+  }
+
   // 2. Optimize function library
   FunctionLibraryDefinition flib(OpRegistry::Global(),
                                  optimized_graph->library());
-- 
GitLab


From 8f9413bf41ff89672a3415eef606ecaca7c70a2f Mon Sep 17 00:00:00 2001
From: Mihai Maruseac 
Date: Thu, 13 Sep 2018 16:15:32 -0700
Subject: [PATCH 0175/1357] Ensure that the input image of decode.bmp.op has
 valid dimensions.

This prevents an undefined behavior with signed integer overflow in
decode.bmp.op.

PiperOrigin-RevId: 212897289
---
 tensorflow/core/kernels/decode_bmp_op.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc
index 750efca592..ae451be7e2 100644
--- a/tensorflow/core/kernels/decode_bmp_op.cc
+++ b/tensorflow/core/kernels/decode_bmp_op.cc
@@ -91,8 +91,10 @@ class DecodeBmpOp : public OpKernel {
                 errors::InvalidArgument(
                     "Number of channels must be 1, 3 or 4, was ", channels_));
 
-    OP_REQUIRES(context, width > 0 && header_size >= 0,
+    OP_REQUIRES(context, width > 0,
                 errors::InvalidArgument("Width must be positive"));
+    OP_REQUIRES(context, height != 0,
+                errors::InvalidArgument("Height must be nonzero"));
     OP_REQUIRES(context, header_size >= 0,
                 errors::InvalidArgument("header size must be nonnegative"));
 
-- 
GitLab


From 5dd20118a25e8d29b7684cf5fb17951657a4a687 Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Thu, 13 Sep 2018 16:18:18 -0700
Subject: [PATCH 0176/1357] Convert logdir paths to strings.

This supports pathlib and other non-string path types.

PiperOrigin-RevId: 212897666
---
 tensorflow/python/ops/summary_ops_v2.py               | 1 +
 tensorflow/python/summary/writer/event_file_writer.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index 94c7d88b5c..a404507627 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -234,6 +234,7 @@ def create_file_writer(logdir,
   """
   if logdir is None:
     return SummaryWriter(None, None)
+  logdir = str(logdir)
   with ops.device("cpu:0"):
     if max_queue is None:
       max_queue = constant_op.constant(10)
diff --git a/tensorflow/python/summary/writer/event_file_writer.py b/tensorflow/python/summary/writer/event_file_writer.py
index 2936a279bd..14dec982a6 100644
--- a/tensorflow/python/summary/writer/event_file_writer.py
+++ b/tensorflow/python/summary/writer/event_file_writer.py
@@ -62,7 +62,7 @@ class EventFileWriter(object):
       filename_suffix: A string. Every event file's name is suffixed with
         `filename_suffix`.
     """
-    self._logdir = logdir
+    self._logdir = str(logdir)
     if not gfile.IsDirectory(self._logdir):
       gfile.MakeDirs(self._logdir)
     self._event_queue = six.moves.queue.Queue(max_queue)
-- 
GitLab


From 3b438e4a24dd0f113f1d36d97196a027bd473fc4 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal 
Date: Thu, 13 Sep 2018 16:42:57 -0700
Subject: [PATCH 0177/1357] [tf.data] Changes `make_batched_features_dataset`
 and `make_tf_record_dataset` default `prefetch` buffer size to auto-tune
 (from 1).

PiperOrigin-RevId: 212900920
---
 tensorflow/contrib/data/__init__.py           |  9 +++--
 tensorflow/contrib/data/python/ops/BUILD      |  1 +
 .../contrib/data/python/ops/optimization.py   |  3 ++
 tensorflow/contrib/data/python/ops/readers.py | 39 +++++++++----------
 .../core/kernels/data/prefetch_autotuner.cc   | 13 ++++++-
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index baec238c62..c378b1ce8d 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -62,6 +62,8 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@sloppy_interleave
 @@unbatch
 @@unique
+
+@@AUTOTUNE
 """
 
 from __future__ import absolute_import
@@ -91,6 +93,10 @@ from tensorflow.contrib.data.python.ops.interleave_ops import sample_from_datase
 from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave
 from tensorflow.contrib.data.python.ops.iterator_ops import CheckpointInputPipelineHook
 from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator
+
+# Optimization constant that can be used to enable auto-tuning.
+from tensorflow.contrib.data.python.ops.optimization import AUTOTUNE
+
 from tensorflow.contrib.data.python.ops.parsing_ops import parse_example_dataset
 from tensorflow.contrib.data.python.ops.prefetching_ops import copy_to_device
 from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device
@@ -113,6 +119,3 @@ from tensorflow.python.data.ops.optional_ops import Optional
 
 from tensorflow.python.util.all_util import remove_undocumented
 remove_undocumented(__name__)
-
-# A constant that can be used to enable auto-tuning.
-AUTOTUNE = -1
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 4b45cc7e36..a14781cd93 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -80,6 +80,7 @@ py_library(
         ":batching",
         ":gen_dataset_ops",
         ":interleave_ops",
+        ":optimization",
         ":parsing_ops",
         ":shuffle_ops",
         "//tensorflow/python:constant_op",
diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py
index 4114b62e29..73840452df 100644
--- a/tensorflow/contrib/data/python/ops/optimization.py
+++ b/tensorflow/contrib/data/python/ops/optimization.py
@@ -24,6 +24,9 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
 
+# A constant that can be used to enable auto-tuning.
+AUTOTUNE = -1
+
 
 # TODO(jsimsa): Support RE matching for both individual transformation (e.g. to
 # account for indexing) and transformation sequence.
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index 4c466781f7..785b395707 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops
 from tensorflow.contrib.data.python.ops import interleave_ops
+from tensorflow.contrib.data.python.ops import optimization
 from tensorflow.contrib.data.python.ops import parsing_ops
 from tensorflow.contrib.data.python.ops import shuffle_ops
 from tensorflow.python.data.ops import dataset_ops
@@ -214,18 +215,17 @@ def _maybe_shuffle_and_repeat(
   return dataset
 
 
-def make_tf_record_dataset(
-    file_pattern,
-    batch_size,
-    parser_fn=None,
-    num_epochs=None,
-    shuffle=True,
-    shuffle_buffer_size=None,
-    shuffle_seed=None,
-    prefetch_buffer_size=None,
-    num_parallel_reads=None,
-    num_parallel_parser_calls=None,
-    drop_final_batch=False):
+def make_tf_record_dataset(file_pattern,
+                           batch_size,
+                           parser_fn=None,
+                           num_epochs=None,
+                           shuffle=True,
+                           shuffle_buffer_size=None,
+                           shuffle_seed=None,
+                           prefetch_buffer_size=optimization.AUTOTUNE,
+                           num_parallel_reads=None,
+                           num_parallel_parser_calls=None,
+                           drop_final_batch=False):
   """Reads and optionally parses TFRecord files into a dataset.
 
   Provides common functionality such as batching, optional parsing, shuffling,
@@ -300,8 +300,6 @@ def make_tf_record_dataset(
         parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
         drop_remainder=drop_final_batch))
 
-  if prefetch_buffer_size is None:
-    prefetch_buffer_size = -1  # tf.config.data.AUTOTUNE
   if prefetch_buffer_size == 0:
     return dataset
   else:
@@ -323,7 +321,7 @@ def make_csv_dataset(
     shuffle=True,
     shuffle_buffer_size=10000,
     shuffle_seed=None,
-    prefetch_buffer_size=1,
+    prefetch_buffer_size=optimization.AUTOTUNE,
     num_parallel_reads=1,
     sloppy=False,
     num_rows_for_inference=100,
@@ -386,9 +384,10 @@ def make_csv_dataset(
     shuffle_buffer_size: Buffer size to use for shuffling. A large buffer size
       ensures better shuffling, but increases memory usage and startup time.
     shuffle_seed: Randomization seed to use for shuffling.
-    prefetch_buffer_size: An int specifying the number of feature batches to
-      prefetch for performance improvement. Recommended value is the number of
-      batches consumed per training step.
+    prefetch_buffer_size: An int specifying the number of feature
+      batches to prefetch for performance improvement. Recommended value is the
+      number of batches consumed per training step. Defaults to auto-tune.
+
     num_parallel_reads: Number of threads used to read CSV records from files.
       If >1, the results will be interleaved.
     sloppy: If `True`, reading performance will be improved at
@@ -666,7 +665,7 @@ def make_batched_features_dataset(file_pattern,
                                   shuffle=True,
                                   shuffle_buffer_size=10000,
                                   shuffle_seed=None,
-                                  prefetch_buffer_size=1,
+                                  prefetch_buffer_size=optimization.AUTOTUNE,
                                   reader_num_threads=1,
                                   parser_num_threads=2,
                                   sloppy_ordering=False,
@@ -739,7 +738,7 @@ def make_batched_features_dataset(file_pattern,
     shuffle_seed: Randomization seed to use for shuffling.
     prefetch_buffer_size: Number of feature batches to prefetch in order to
       improve performance. Recommended value is the number of batches consumed
-      per training step (default is 1).
+      per training step. Defaults to auto-tune.
     reader_num_threads: Number of threads used to read `Example` records. If >1,
       the results will be interleaved.
     parser_num_threads: Number of threads to use for parsing `Example` tensors
diff --git a/tensorflow/core/kernels/data/prefetch_autotuner.cc b/tensorflow/core/kernels/data/prefetch_autotuner.cc
index 533d0bd5d2..da357339c9 100644
--- a/tensorflow/core/kernels/data/prefetch_autotuner.cc
+++ b/tensorflow/core/kernels/data/prefetch_autotuner.cc
@@ -26,6 +26,13 @@ PrefetchAutotuner::PrefetchAutotuner(int64 initial_buffer_size)
   }
 }
 
+namespace {
+// Determines what strategy to use for increasing the buffer size limit. For
+// limits less than the threshold, an exponential increase is used, while for
+// limits greater than or equal to the threshold, a linear increase is used.
+size_t kBufferLimitThreshold = 2048;
+}  // namespace
+
 void PrefetchAutotuner::RecordConsumption(size_t current_buffer_size) {
   switch (mode_) {
     case Mode::kDisabled:
@@ -37,7 +44,11 @@ void PrefetchAutotuner::RecordConsumption(size_t current_buffer_size) {
       return;
     case Mode::kDownswing:
       if (current_buffer_size == 0) {
-        buffer_limit_ *= 2;  // Increase the buffer size.
+        if (buffer_limit_ >= kBufferLimitThreshold) {
+          buffer_limit_ += kBufferLimitThreshold;
+        } else {
+          buffer_limit_ *= 2;
+        }
         mode_ = Mode::kUpswing;
       }
       return;
-- 
GitLab


From 4137d84a3b41638d4048e45ab579662c18a06df5 Mon Sep 17 00:00:00 2001
From: Priya Gupta 
Date: Thu, 13 Sep 2018 16:45:11 -0700
Subject: [PATCH 0178/1357] Use `dataset.batch(.., drop_remainder=True)`
 instead of map_and_batch to achieve the same effect.

PiperOrigin-RevId: 212901207
---
 tensorflow/contrib/distribute/python/BUILD                  | 1 -
 tensorflow/contrib/distribute/python/single_loss_example.py | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 87f76eaa94..aaecbb0eb1 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -485,7 +485,6 @@ py_library(
     srcs = ["single_loss_example.py"],
     deps = [
         ":step_fn",
-        "//tensorflow/contrib/data/python/ops:batching",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:layers",
diff --git a/tensorflow/contrib/distribute/python/single_loss_example.py b/tensorflow/contrib/distribute/python/single_loss_example.py
index 5aa19cf6a9..09b351ffa4 100644
--- a/tensorflow/contrib/distribute/python/single_loss_example.py
+++ b/tensorflow/contrib/distribute/python/single_loss_example.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.data.python.ops import batching
 from tensorflow.contrib.distribute.python import step_fn
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -59,10 +58,9 @@ def minimize_loss_example(optimizer_fn,
 
   def dataset_fn():
     dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
-    # TODO(isaprykin): map_and_batch with drop_remainder causes shapes to be
+    # TODO(isaprykin): batch with drop_remainder causes shapes to be
     # fully defined for TPU.  Remove this when XLA supports dynamic shapes.
-    return dataset.apply(
-        batching.map_and_batch(lambda x: x, batch_size=1, drop_remainder=True))
+    return dataset.batch(1, drop_remainder=True)
 
   # An Optimizer instance is created either outside or inside model_fn.
   outer_optimizer = None
-- 
GitLab


From 4b42a284683416ab6159f32c903321af9dc9a591 Mon Sep 17 00:00:00 2001
From: Jared Duke 
Date: Thu, 13 Sep 2018 16:58:34 -0700
Subject: [PATCH 0179/1357] Reland "Add basic type propagation for unsupported
 ops in TFLite conversion"

The original CL was rolled back due to op registration conflicts in the pip.
Resolve the issue by only including core:ops in the toco binary itself, not in intermediate libraries.

PiperOrigin-RevId: 212902838
---
 tensorflow/contrib/lite/toco/BUILD            |  6 +-
 .../contrib/lite/toco/import_tensorflow.cc    | 20 +++++
 .../lite/toco/import_tensorflow_test.cc       | 75 +++++++++++++++++--
 3 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD
index bea90f1ce8..96b88b60fc 100644
--- a/tensorflow/contrib/lite/toco/BUILD
+++ b/tensorflow/contrib/lite/toco/BUILD
@@ -347,6 +347,7 @@ tf_cc_test(
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
         "//tensorflow/core:protos_all_cc",
         "@com_google_googletest//:gtest_main",
     ],
@@ -407,8 +408,11 @@ tf_cc_binary(
         ":toco_port",
         ":toco_tooling",
         ":types_proto_cc",
-        "//tensorflow/core:lib",
         "@com_google_absl//absl/strings",
+        "//tensorflow/core:lib",
+        # We cannot embed the core:ops dependency directly into :toco_tooling as
+        # it can conflict with downstream deps when toco is used as a library.
+        "//tensorflow/core:ops",
     ],
 )
 
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc
index 9bc23c4b3c..efc1007925 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@@ -58,6 +58,7 @@ using tensorflow::DT_STRING;
 using tensorflow::DT_UINT8;
 using tensorflow::GraphDef;
 using tensorflow::NodeDef;
+using tensorflow::OpRegistry;
 using tensorflow::TensorProto;
 using tensorflow::TensorShapeProto;
 
@@ -1079,6 +1080,25 @@ tensorflow::Status ConvertUnsupportedOperator(
   } else if (HasAttr(node, "Tout")) {
     const auto& output_type = GetDataTypeAttr(node, "Tout");
     op->output_data_types.push_back(ConvertDataType(output_type));
+  } else {
+    const tensorflow::OpDef* op_def = nullptr;
+    if (OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) {
+      for (const auto& output_arg : op_def->output_arg()) {
+        if (HasAttr(node, output_arg.type_attr())) {
+          op->output_data_types.push_back(
+              ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr())));
+        } else {
+          LOG(INFO) << "Op node missing output type attribute: " << node.name();
+          op->output_data_types.clear();
+          break;
+        }
+      }
+    }
+    if (op->output_data_types.empty()) {
+      // TODO(b/113613439): Figure out how to propagate types for custom ops
+      // that have no OpDef.
+      LOG(INFO) << "Unable to determine output type for op: " << node.op();
+    }
   }
   if (HasAttr(node, kAttrOutputShapes)) {
     const auto& output_shapes = GetListAttr(node, kAttrOutputShapes);
diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
index a00e136dd6..da248826a7 100644
--- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc
@@ -49,6 +49,17 @@ Status ImportTensorFlowNode(const NodeDef&, const TensorFlowImportFlags&,
 
 namespace {
 
+Status ImportNode(const NodeDef& node, Model* model) {
+  const auto converter = internal::GetTensorFlowNodeConverterMap();
+  return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model,
+                                        converter);
+}
+
+Status ImportNode(const NodeDef& node) {
+  Model model;
+  return ImportNode(node, &model);
+}
+
 class ShapeImportTest : public ::testing::TestWithParam {
  protected:
   ShapeImportTest() {}
@@ -109,12 +120,24 @@ class ShapeImportTest : public ::testing::TestWithParam {
     SetAttrValue(t, &value_attr);
     (*node->mutable_attr())["value"] = value_attr;
   }
+};
+
+class TypeImportTest : public ::testing::TestWithParam<
+                           std::pair> {
+ protected:
+  TypeImportTest() {}
+
+  void BuildUnaryNode(const std::string& op_name, tensorflow::DataType dtype,
+                      NodeDef* node) {
+    node->set_op(op_name);
+    node->set_name("Node1");
+
+    node->add_input();
+    node->set_input(0, "Node0");
 
-  Status ImportNode(const NodeDef& node) {
-    Model model;
-    const auto converter = internal::GetTensorFlowNodeConverterMap();
-    return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), &model,
-                                          converter);
+    AttrValue dtype_attr;
+    SetAttrValue(dtype, &dtype_attr);
+    (*node->mutable_attr())["T"] = dtype_attr;
   }
 };
 
@@ -167,5 +190,47 @@ TEST_P(ShapeImportTest, ValidShapeButZeroElements) {
 INSTANTIATE_TEST_CASE_P(ValidShapeButZeroElements, ShapeImportTest,
                         ::testing::ValuesIn(TestTypes()));
 
+std::vector> UnaryTestTypes() {
+  return {{DT_FLOAT, ArrayDataType::kFloat},
+          {DT_INT32, ArrayDataType::kInt32},
+          {DT_INT64, ArrayDataType::kInt64}};
+}
+
+TEST_P(TypeImportTest, BasicTypeInference) {
+  NodeDef node;
+  BuildUnaryNode("Atan", GetParam().first, &node);
+
+  Model model;
+  EXPECT_TRUE(ImportNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast(
+          model.operators[0].get());
+  ASSERT_THAT(op->output_data_types, ::testing::ElementsAre(GetParam().second));
+}
+INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest,
+                        ::testing::ValuesIn(UnaryTestTypes()));
+
+TEST(ImportTest, FailedTypeInference) {
+  // Create a unary op with no Type ("T") annotation.
+  NodeDef node;
+  node.set_op("Atan");
+  node.set_name("Node1");
+  node.add_input();
+  node.set_input(0, "Node0");
+
+  Model model;
+  EXPECT_TRUE(ImportNode(node, &model).ok());
+
+  ASSERT_THAT(model.operators.size(), ::testing::Ge(1));
+  ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported);
+  const TensorFlowUnsupportedOperator* op =
+      static_cast(
+          model.operators[0].get());
+  ASSERT_TRUE(op->output_data_types.empty());
+}
+
 }  // namespace
 }  // namespace toco
-- 
GitLab


From 97511100c88010d4e57a78685b476b4f8821059e Mon Sep 17 00:00:00 2001
From: Tim Shen 
Date: Thu, 13 Sep 2018 17:17:30 -0700
Subject: [PATCH 0180/1357] Simplify the initialization function in algortihm
 picker. No functional change.

PiperOrigin-RevId: 212905536
---
 .../gpu/cudnn_convolution_algorithm_picker.cc | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index c607aea1a8..f528e62b17 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -221,25 +221,12 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
     allocator = &*se_allocator;
   }
 
-  // Allocate space for the input, filter, and output of the convolution.  We
-  // use a ScratchAllocator for this instead of calling allocator_ directly so
-  // that our allocations don't leak.
-  ScratchAllocator input_output_allocator(device_ordinal, allocator);
-  TF_ASSIGN_OR_RETURN(params.input_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(input_shape)));
-  TF_ASSIGN_OR_RETURN(params.filter_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(filter_shape)));
-  TF_ASSIGN_OR_RETURN(params.output_buf,
-                      input_output_allocator.AllocateBytes(
-                          &stream, ShapeUtil::ByteSizeOf(output_shape)));
-
-  if (cross_check_enabled) {
-    // Broadcast a constant to the buffer, instead of zeroing the buffer. A
-    // non-zero constant is useful for the cross checking, because zero-inputs
-    // may not always reveal the bugs.
-    const auto initialize_f16 = [&stream](DeviceMemoryBase buffer) {
+  const auto initialize_buffer = [&stream, cross_check_enabled](
+                                     DeviceMemoryBase buffer) {
+    if (cross_check_enabled) {
+      // Broadcast a constant to the buffer, instead of zeroing the buffer. A
+      // non-zero constant is useful for the cross checking, because zero-inputs
+      // may not always reveal the bugs.
       CHECK_EQ(0, (uintptr_t)buffer.opaque() % 4);
       size_t left_over_bytes = buffer.size() % 4;
       CHECK_EQ(0, left_over_bytes % 2);
@@ -257,19 +244,32 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
       DeviceMemoryBase left_over(
           static_cast(buffer.opaque()) + aligned_size, left_over_bytes);
       stream.ThenMemcpy(&left_over, halfs, left_over_bytes);
-    };
-    initialize_f16(params.input_buf);
-    initialize_f16(params.filter_buf);
-    initialize_f16(params.output_buf);
-  } else {
-    // Although we don't have evidence this matters, zero out the buffers before
-    // autotuning.  It's conceivable that using uninitialized memory as the
-    // inputs might affect performance if e.g. the inputs contain denormals, and
-    // this is easy enough.
-    stream.ThenMemZero(¶ms.input_buf, params.input_buf.size())
-        .ThenMemZero(¶ms.filter_buf, params.filter_buf.size())
-        .ThenMemZero(¶ms.output_buf, params.output_buf.size());
-  }
+    } else {
+      // Although we don't have evidence this matters, zero out the buffers
+      // before autotuning.  It's conceivable that using uninitialized memory as
+      // the inputs might affect performance if e.g. the inputs contain
+      // denormals, and this is easy enough.
+      stream.ThenMemZero(&buffer, buffer.size());
+    }
+  };
+
+  // Allocate space for the input, filter, and output of the convolution.  We
+  // use a ScratchAllocator for this instead of calling allocator_ directly so
+  // that our allocations don't leak.
+  ScratchAllocator input_output_allocator(device_ordinal, allocator);
+  TF_ASSIGN_OR_RETURN(params.input_buf,
+                      input_output_allocator.AllocateBytes(
+                          &stream, ShapeUtil::ByteSizeOf(input_shape)));
+  TF_ASSIGN_OR_RETURN(params.filter_buf,
+                      input_output_allocator.AllocateBytes(
+                          &stream, ShapeUtil::ByteSizeOf(filter_shape)));
+  TF_ASSIGN_OR_RETURN(params.output_buf,
+                      input_output_allocator.AllocateBytes(
+                          &stream, ShapeUtil::ByteSizeOf(output_shape)));
+
+  initialize_buffer(params.input_buf);
+  initialize_buffer(params.filter_buf);
+  initialize_buffer(params.output_buf);
 
   DeviceMemoryBase* result_buf = [&] {
     switch (params.kind) {
-- 
GitLab


From 2e11d827d656a671757d386881e925c97f0b3d9c Mon Sep 17 00:00:00 2001
From: Pavithra Vijay 
Date: Thu, 13 Sep 2018 17:39:47 -0700
Subject: [PATCH 0181/1357] Fix performance issue when training with keras
 model in eager mode.

PiperOrigin-RevId: 212908218
---
 tensorflow/python/keras/engine/training.py    | 37 +++++++++++++------
 .../python/keras/engine/training_test.py      | 19 ++++++++++
 2 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index c6749468c8..fed07c4120 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -209,8 +209,27 @@ class Model(Network):
     for metric in metrics:
       metric_fn = training_utils.get_metric_function(
           metric, output_shape=output_shape, loss_fn=loss_fn)
-      metric_name = self._get_metric_name(
-          metric, output_index, weighted=weights is not None)
+
+      if (context.executing_eagerly() and y_true is not None and
+          y_pred is not None):
+        # In eager mode, when executing metric_fn during training, we do not
+        # need to generate unique metric name and add it to the model
+        # as we have done that during compile already.
+        prefix = 'weighted_' if weights is not None else ''
+        suffix = metric_fn.name if hasattr(metric_fn,
+                                           'name') else metric_fn.__name__
+        metric_name = prefix + suffix
+      else:
+        # Get metric name that is to be added to the model.
+        metric_name = self._get_metric_name(
+            metric, output_index, weighted=weights is not None)
+        # Keep track of metric name.
+        self.metrics_names.append(metric_name)
+
+        # Keep track of stateful metric attributes (name and metric function).
+        if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful:
+          self.stateful_metric_names.append(metric_name)
+          self.stateful_metric_functions.append(metric_fn)
 
       with K.name_scope(metric_name):
         # If both outputs and targets are available, call the metric function.
@@ -250,16 +269,10 @@ class Model(Network):
             self.metrics_tensors.append(metric_result)
           metric_results.append(metric_result)
 
-      # Keep track of metric name.
-      self.metrics_names.append(metric_name)
-
-      # Keep track of stateful metric attributes (name and metric function).
-      if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful:
-        self.stateful_metric_names.append(metric_name)
-        self.stateful_metric_functions.append(metric_fn)
-        if not context.executing_eagerly():
-          # Keep track of updates created by stateful metrics.
-          self.metrics_updates += metric_fn.updates
+      if (isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful and
+          not context.executing_eagerly()):
+        # Keep track of updates created by stateful metrics.
+        self.metrics_updates += metric_fn.updates
     return metric_results
 
   def _handle_metrics(self,
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 380130095b..30be4131a4 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -2256,7 +2256,26 @@ class TestTrainingWithMetrics(test.TestCase):
         'dense_binary_accuracy', 'dropout_mean_squared_error',
         'dropout_binary_accuracy'
     ]
+    reference_stateful_metric_names = [
+        'dense_binary_accuracy', 'dropout_binary_accuracy'
+    ]
+    self.assertEqual(reference_metric_names, model.metrics_names)
+    self.assertEqual(reference_stateful_metric_names,
+                     model.stateful_metric_names)
+
+    # Verify that model metric names are not altered during training.
+    input_a_np = np.random.random((10, 3))
+    input_b_np = np.random.random((10, 3))
+
+    output_d_np = np.random.random((10, 4))
+    output_e_np = np.random.random((10, 4))
+
+    model.fit([input_a_np, input_b_np], [output_d_np, output_e_np],
+              epochs=1,
+              batch_size=5)
     self.assertEqual(reference_metric_names, model.metrics_names)
+    self.assertEqual(reference_stateful_metric_names,
+                     model.stateful_metric_names)
 
   @tf_test_util.run_in_graph_and_eager_modes
   def test_metrics_correctness(self):
-- 
GitLab


From eb5cd6926ef8d2a5a748f1aa978e51148e22dd97 Mon Sep 17 00:00:00 2001
From: Francois Chollet 
Date: Thu, 13 Sep 2018 18:19:50 -0700
Subject: [PATCH 0182/1357] Make Keras relu use nn.leaky_relu when appropriate.

PiperOrigin-RevId: 212912615
---
 tensorflow/python/keras/backend.py                    |  3 +++
 tensorflow/python/keras/backend_test.py               |  3 ++-
 .../python/keras/layers/advanced_activations.py       | 11 +++++------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 529b07dc12..6f766c6257 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3462,6 +3462,9 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   clip_max = max_value is not None
 
   if alpha != 0.:
+    if max_value is None and threshold == 0:
+      return nn.leaky_relu(x, alpha=alpha)
+
     if threshold != 0:
       negative_part = nn.relu(-x + threshold)
     else:
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 2f271c4f50..ab71589940 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -522,8 +522,9 @@ class BackendLinearAlgebraTest(test.TestCase):
       relu_op = keras.backend.relu(x)
       self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 7]])
 
-      # alpha
+      # alpha (leaky relu used)
       relu_op = keras.backend.relu(x, alpha=0.5)
+      self.assertTrue('LeakyRelu' in relu_op.name)
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]])
 
       # max_value < some elements
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 61ab69c16f..731d180a80 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -268,7 +268,7 @@ class Softmax(Layer):
     self.axis = axis
 
   def call(self, inputs):
-    return activations.softmax(inputs, axis=self.axis)
+    return K.softmax(inputs, axis=self.axis)
 
   def get_config(self):
     config = {'axis': self.axis}
@@ -322,11 +322,10 @@ class ReLU(Layer):
   def call(self, inputs):
     # alpha is used for leaky relu slope in activations instead of
     # negative_slope.
-    return activations.relu(
-        inputs,
-        alpha=self.negative_slope,
-        max_value=self.max_value,
-        threshold=self.threshold)
+    return K.relu(inputs,
+                  alpha=self.negative_slope,
+                  max_value=self.max_value,
+                  threshold=self.threshold)
 
   def get_config(self):
     config = {
-- 
GitLab


From 1831ef73ba693ba7f27a3ecb391b47601e6a3758 Mon Sep 17 00:00:00 2001
From: Chris Leary 
Date: Thu, 13 Sep 2018 18:34:29 -0700
Subject: [PATCH 0183/1357] [XLA] Add hook for dump directory expansion.

Also puts a ".unoptimized" suffix on dumped HLO protobuf files
to avoid the unoptimized dumped HLO protobuf colliding with the
optimized dumped HLO protobufs when the same dump directory is
specified for both.

PiperOrigin-RevId: 212914100
---
 tensorflow/compiler/xla/BUILD                 |  1 +
 tensorflow/compiler/xla/protobuf_util.cc      | 29 +++++++++++++++++--
 tensorflow/compiler/xla/protobuf_util.h       |  4 +++
 .../xla/service/compile_only_service.cc       |  2 +-
 tensorflow/compiler/xla/service/service.cc    |  7 +++--
 tensorflow/compiler/xla/service/service.h     |  4 ++-
 6 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 76e36f3c46..ef70c1f8ac 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -193,6 +193,7 @@ cc_library(
         ":types",
         ":util",
         "//tensorflow/core:lib",
+        "@com_google_absl//absl/synchronization",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/protobuf_util.cc b/tensorflow/compiler/xla/protobuf_util.cc
index 787725e884..b507a2ef79 100644
--- a/tensorflow/compiler/xla/protobuf_util.cc
+++ b/tensorflow/compiler/xla/protobuf_util.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/protobuf.h"
 
 namespace xla {
@@ -49,16 +50,40 @@ string SanitizeFilename(const string& file_name) {
   return safe_file_name;
 }
 
+std::pair>*>
+GetDirectoryExpanders() {
+  static auto* mutex = new tensorflow::mutex;
+  static auto* singleton = new std::vector>;
+  return {mutex, singleton};
+}
+
+// Runs all the directory expanders over x and returns the result.
+string Expand(string x) {
+  auto pair = GetDirectoryExpanders();
+  tensorflow::mutex_lock lock(*pair.first);
+  for (const auto& f : *pair.second) {
+    x = f(x);
+  }
+  return x;
+}
+
 }  // namespace
 
 Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
                             const string& directory, const string& file_name) {
   tensorflow::Env* env = tensorflow::Env::Default();
-  TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory));
+  string expanded_dir = Expand(directory);
+  TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(expanded_dir));
   string safe_file_name = SanitizeFileName(file_name) + ".pb";
-  const string path = tensorflow::io::JoinPath(directory, safe_file_name);
+  const string path = tensorflow::io::JoinPath(expanded_dir, safe_file_name);
   return tensorflow::WriteBinaryProto(env, path, message);
 }
 
+void RegisterDirectoryExpander(const std::function& expander) {
+  auto pair = GetDirectoryExpanders();
+  tensorflow::mutex_lock lock(*pair.first);
+  pair.second->push_back(expander);
+}
+
 }  // namespace protobuf_util
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/protobuf_util.h b/tensorflow/compiler/xla/protobuf_util.h
index 3667621367..f22fc8b849 100644
--- a/tensorflow/compiler/xla/protobuf_util.h
+++ b/tensorflow/compiler/xla/protobuf_util.h
@@ -39,6 +39,10 @@ extern bool ProtobufEquals(const tensorflow::protobuf::Message& m1,
 Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
                             const string& directory, const string& file_name);
 
+// Registers a function that may either expand a dirpath or forward the original
+// dirpath along as-is.
+void RegisterDirectoryExpander(const std::function& expander);
+
 }  // namespace protobuf_util
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index e5a6c28478..96bd2616f5 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -97,7 +97,7 @@ CompileOnlyService::CompileAheadOfTime(
     TF_ASSIGN_OR_RETURN(
         std::unique_ptr hlo_module,
         HloModule::CreateFromProto(instance.computation, *module_config));
-    TF_RETURN_IF_ERROR(MaybeDumpHloModule(*hlo_module));
+    TF_RETURN_IF_ERROR(MaybeDumpUnoptimizedHloModule(*hlo_module));
     hlo_modules.push_back(std::move(hlo_module));
   }
 
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 922ebdf0e3..b27a92f2a0 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -812,7 +812,7 @@ StatusOr> Service::BuildExecutable(
   TF_ASSIGN_OR_RETURN(std::unique_ptr module,
                       HloModule::CreateFromProto(module_proto, *module_config));
 
-  TF_RETURN_IF_ERROR(MaybeDumpHloModule(*module));
+  TF_RETURN_IF_ERROR(MaybeDumpUnoptimizedHloModule(*module));
 
   TF_ASSIGN_OR_RETURN(
       module, backend->compiler()->RunHloPasses(std::move(module), executor,
@@ -1160,7 +1160,7 @@ StatusOr> Service::Replicas(
   return replicas;
 }
 
-Status Service::MaybeDumpHloModule(const HloModule& module) const {
+Status Service::MaybeDumpUnoptimizedHloModule(const HloModule& module) const {
   const string xla_dump_unoptimized_hlo_proto_to =
       module.config().debug_options().xla_dump_unoptimized_hlo_proto_to();
   if (xla_dump_unoptimized_hlo_proto_to.empty()) {
@@ -1168,7 +1168,8 @@ Status Service::MaybeDumpHloModule(const HloModule& module) const {
   }
   HloProto proto = MakeHloProto(module);
   return protobuf_util::DumpProtoToDirectory(
-      proto, xla_dump_unoptimized_hlo_proto_to, module.name());
+      proto, xla_dump_unoptimized_hlo_proto_to,
+      StrCat(module.name(), ".unoptimized"));
 }
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 44c5248b15..1f62fad4c8 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -271,7 +271,9 @@ class Service : public ServiceInterface {
   StatusOr> Replicas(
       const Backend& backend, const DeviceHandle& device_handle) const;
 
-  Status MaybeDumpHloModule(const HloModule& module) const;
+  // Dumps the (unoptimized) module given if the corresponding DebugOptions
+  // field has been set.
+  Status MaybeDumpUnoptimizedHloModule(const HloModule& module) const;
 
   // Returns the device handle that represents the replicated device for a
   // single computation that is not model-parallelized.
-- 
GitLab


From 6dd278831a62be829ce6f15039e5b6b368b3727c Mon Sep 17 00:00:00 2001
From: Sung Jin Hwang 
Date: Thu, 13 Sep 2018 19:44:28 -0700
Subject: [PATCH 0184/1357] Added Pyclif binding rule for config.proto.

PiperOrigin-RevId: 212920113
---
 tensorflow/core/BUILD | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 8f32bc2844..1a86bff5cd 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1920,6 +1920,13 @@ tf_pyclif_proto_library(
     visibility = ["//visibility:public"],
 )
 
+tf_pyclif_proto_library(
+    name = "protobuf/config_pyclif",
+    proto_lib = ":protos_all_cc",
+    proto_srcfile = "protobuf/config.proto",
+    visibility = ["//visibility:public"],
+)
+
 tf_pyclif_proto_library(
     name = "protobuf/device_properties_pyclif",
     proto_lib = ":protos_all_cc",
-- 
GitLab


From 4a665550dacdb5e162e71b4afe039de178ffc49f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Thu, 13 Sep 2018 20:08:15 -0700
Subject: [PATCH 0185/1357] Add missing #include to mkl_layout_pass.cc.

PiperOrigin-RevId: 212921868
---
 tensorflow/core/graph/mkl_layout_pass.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 2e644fe987..f5b0105862 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/common_runtime/optimization_registry.h"
 #include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/node_builder.h"
-- 
GitLab


From 40010e6287980f63158807aef163276ed1cce272 Mon Sep 17 00:00:00 2001
From: Yunxing Dai 
Date: Thu, 13 Sep 2018 20:36:01 -0700
Subject: [PATCH 0186/1357] [GraphCompiler] Remove the use of XLA context as
 arugmnet.

- XLAContext is never used in the class, remove it from member list.
- Be more clear in the comment that the result is written to the context from the compilation device.

PiperOrigin-RevId: 212924213
---
 tensorflow/compiler/tf2xla/graph_compiler.h | 13 ++++++-------
 tensorflow/compiler/tf2xla/xla_compiler.cc  |  3 +--
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/graph_compiler.h b/tensorflow/compiler/tf2xla/graph_compiler.h
index ab7cac7100..e9f02201cf 100644
--- a/tensorflow/compiler/tf2xla/graph_compiler.h
+++ b/tensorflow/compiler/tf2xla/graph_compiler.h
@@ -55,17 +55,17 @@ namespace tensorflow {
 // op registration infrastructure instead of FunctionLibraryRuntime.
 class GraphCompiler {
  public:
-  GraphCompiler(XlaContext* xla_context, XlaCompilationDevice* device,
-                Graph* graph, FunctionLibraryRuntime* flib,
+  GraphCompiler(XlaCompilationDevice* device, Graph* graph,
+                FunctionLibraryRuntime* flib,
                 ScopedStepContainer* step_container)
-      : xla_context_(xla_context),
-        device_(device),
+      : device_(device),
         graph_(graph),
         flib_(flib),
         step_container_(step_container) {}
 
-  // Compiles the graph. The results are written in `xla_context` that is passed
-  // into the compiler.
+  // Compiles the graph. The results are written in xla_context stored in the
+  // resource_manager of the 'XlaCompilationDevice' that's passed into the
+  // constructor.
   Status Compile();
 
  private:
@@ -82,7 +82,6 @@ class GraphCompiler {
   // using `compiler_`.
   Status CompileFunctionalNode(Node* n, OpKernelContext* op_context);
 
-  XlaContext* xla_context_;
   XlaCompilationDevice* device_;
   Graph* graph_;
   FunctionLibraryRuntime* flib_;
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 105f3b61d5..739e47778a 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -325,8 +325,7 @@ Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr graph,
       step_container->name(), XlaContext::kXlaContextResourceName,
       xla_context));
 
-  GraphCompiler graph_compiler(xla_context, device, graph.get(), flib,
-                               step_container.get());
+  GraphCompiler graph_compiler(device, graph.get(), flib, step_container.get());
   TF_RETURN_IF_ERROR(graph_compiler.Compile());
   // Explicitly clean up the step container, to capture the cleanup status.
   step_container.reset();
-- 
GitLab


From 30e176f584d80898ebad00d2a2ff226e6c281c50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 
Date: Fri, 14 Sep 2018 11:32:37 +0800
Subject: [PATCH 0187/1357] CLN: only assert gains >= 0 for normalization

---
 tensorflow/python/estimator/canned/boosted_trees.py  |  8 ++++----
 .../python/estimator/canned/boosted_trees_test.py    | 12 ++++++++++--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py
index 812c892363..7c04ff7970 100644
--- a/tensorflow/python/estimator/canned/boosted_trees.py
+++ b/tensorflow/python/estimator/canned/boosted_trees.py
@@ -1036,8 +1036,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
     feature_importances: A list of corresponding feature importances.
 
   Raises:
-    AssertionError: If feature importances contain negative value.
-      Or if normalize = True and normalization is not possible
+    AssertionError: When normalize = True, if feature importances
+      contain negative value, or if normalization is not possible
       (e.g. ensemble is empty or trees contain only a root node).
   """
   tree_importances = [_compute_feature_importances_per_tree(tree, num_features)
@@ -1045,9 +1045,9 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize):
   tree_importances = np.array(tree_importances)
   tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1)
   feature_importances = np.sum(tree_importances * tree_weights, axis=0)
-  assert np.all(feature_importances >= 0), ('feature_importances '
-                                            'must be non-negative.')
   if normalize:
+    assert np.all(feature_importances >= 0), ('feature_importances '
+                                              'must be non-negative.')
     normalizer = np.sum(feature_importances)
     assert normalizer > 0, 'Trees are all empty or contain only a root node.'
     feature_importances /= normalizer
diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py
index 1ce4f7d765..3158ccca81 100644
--- a/tensorflow/python/estimator/canned/boosted_trees_test.py
+++ b/tensorflow/python/estimator/canned/boosted_trees_test.py
@@ -949,8 +949,16 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase):
     self._create_fake_checkpoint_with_tree_ensemble_proto(
         est, tree_ensemble_text)
 
-    with self.assertRaisesRegexp(AssertionError, 'non-negative'):
-      est.experimental_feature_importances(normalize=False)
+    # Github #21509 (nataliaponomareva):
+    # The gains stored in the splits can be negative
+    # if people are using complexity regularization.
+    feature_names_expected = ['f_2_bucketized',
+                              'f_0_bucketized',
+                              'f_1_bucketized']
+    feature_names, importances = est.experimental_feature_importances(
+        normalize=False)
+    self.assertAllEqual(feature_names_expected, feature_names)
+    self.assertAllClose([0.0, 0.0, -5.0], importances)
 
     with self.assertRaisesRegexp(AssertionError, 'non-negative'):
       est.experimental_feature_importances(normalize=True)
-- 
GitLab


From 9fcf40afede43c09243d06ba420ac44249067872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 
Date: Fri, 14 Sep 2018 13:44:30 +0800
Subject: [PATCH 0188/1357] CLN: remove unused import

---
 tensorflow/python/keras/layers/advanced_activations.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 61ab69c16f..6922d3ec1e 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.keras import activations
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import constraints
 from tensorflow.python.keras import initializers
-- 
GitLab


From b43aeb053ec440ea5205a09c229339c10a962af4 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy 
Date: Thu, 13 Sep 2018 23:25:24 -0700
Subject: [PATCH 0189/1357] Automated rollback of commit
 eb5cd6926ef8d2a5a748f1aa978e51148e22dd97

PiperOrigin-RevId: 212936412
---
 tensorflow/python/keras/backend.py                    |  3 ---
 tensorflow/python/keras/backend_test.py               |  3 +--
 .../python/keras/layers/advanced_activations.py       | 11 ++++++-----
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 6f766c6257..529b07dc12 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3462,9 +3462,6 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   clip_max = max_value is not None
 
   if alpha != 0.:
-    if max_value is None and threshold == 0:
-      return nn.leaky_relu(x, alpha=alpha)
-
     if threshold != 0:
       negative_part = nn.relu(-x + threshold)
     else:
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index ab71589940..2f271c4f50 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -522,9 +522,8 @@ class BackendLinearAlgebraTest(test.TestCase):
       relu_op = keras.backend.relu(x)
       self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 7]])
 
-      # alpha (leaky relu used)
+      # alpha
       relu_op = keras.backend.relu(x, alpha=0.5)
-      self.assertTrue('LeakyRelu' in relu_op.name)
       self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]])
 
       # max_value < some elements
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 731d180a80..61ab69c16f 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -268,7 +268,7 @@ class Softmax(Layer):
     self.axis = axis
 
   def call(self, inputs):
-    return K.softmax(inputs, axis=self.axis)
+    return activations.softmax(inputs, axis=self.axis)
 
   def get_config(self):
     config = {'axis': self.axis}
@@ -322,10 +322,11 @@ class ReLU(Layer):
   def call(self, inputs):
     # alpha is used for leaky relu slope in activations instead of
     # negative_slope.
-    return K.relu(inputs,
-                  alpha=self.negative_slope,
-                  max_value=self.max_value,
-                  threshold=self.threshold)
+    return activations.relu(
+        inputs,
+        alpha=self.negative_slope,
+        max_value=self.max_value,
+        threshold=self.threshold)
 
   def get_config(self):
     config = {
-- 
GitLab


From 3a2276ced02b217596080fb34654d2dce5069f81 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer 
Date: Fri, 14 Sep 2018 01:24:52 -0700
Subject: [PATCH 0190/1357] [XLA:TF] Make FloorDiv not crash on unsigned types

FloorDiv (which corresponds to the // operator in python) supports uint8 and
uint16 (but not uint32) in TF. Using xla::Abs on unsigned types throws an error,
but the rounding logic is trivial for unsigned types so just do a plain Div.

This isn't tested yet because we don't have any targets supporting uint8 or
uint16 yet.

PiperOrigin-RevId: 212946132
---
 tensorflow/compiler/tf2xla/kernels/binary_ops.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index df17da4c1c..0d9a768a6f 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -66,6 +66,9 @@ XLA_MAKE_BINARY(Complex, xla::Complex(lhs, rhs, extend_dimensions));
 static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x,
                                xla::XlaOp y, const BCast& broadcast_helper) {
   std::tie(x, y) = XlaBinaryOp::Broadcast(b, x, y, broadcast_helper);
+  if (DataTypeIsUnsigned(dtype)) {
+    return xla::Div(x, y);
+  }
   auto zero = XlaHelpers::Zero(b, dtype);
   auto one = XlaHelpers::One(b, dtype);
   auto different_sign = xla::Ne(xla::Lt(x, zero), xla::Lt(y, zero));
-- 
GitLab


From e9f5df6d48eb0999281d73b85ce4a126fcfaab98 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Fri, 14 Sep 2018 02:03:32 -0700
Subject: [PATCH 0191/1357] compat: Update forward compatibility horizon to
 2018-09-14

PiperOrigin-RevId: 212949973
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 1a1ed04e0d..8a100fe975 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 13)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 14)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From c335f3ae6872715c4873eb8af3ff2e42833bc6c0 Mon Sep 17 00:00:00 2001
From: James Keeling 
Date: Fri, 14 Sep 2018 02:26:55 -0700
Subject: [PATCH 0192/1357] [Grappler] s/std::string/string/

string and std::string are not necessarily the same thing in TF, but this code assumed that they are.

PiperOrigin-RevId: 212952877
---
 tensorflow/core/grappler/costs/graph_properties.cc        | 8 ++++----
 tensorflow/core/grappler/costs/utils.cc                   | 8 ++++----
 tensorflow/core/grappler/costs/utils.h                    | 2 +-
 tensorflow/core/grappler/costs/virtual_scheduler_test.cc  | 8 ++++----
 tensorflow/core/grappler/inputs/utils.cc                  | 7 ++++---
 tensorflow/core/grappler/inputs/utils.h                   | 4 ++--
 tensorflow/core/grappler/op_types.cc                      | 2 +-
 .../core/grappler/optimizers/arithmetic_optimizer_test.cc | 4 ++--
 tensorflow/core/grappler/optimizers/data/graph_utils.cc   | 2 +-
 9 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index d273eddf81..56c8339d57 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -260,13 +260,13 @@ typename DisjointSet::Rep* DisjointSet::Find(Handle value) {
 }
 
 bool IsEnqueue(const NodeDef& n) {
-  return (n.op().find("Enqueue") != std::string::npos &&
-          n.op().find("EnqueueMany") == std::string::npos);
+  return (n.op().find("Enqueue") != string::npos &&
+          n.op().find("EnqueueMany") == string::npos);
 }
 
 bool IsDequeue(const NodeDef& n) {
-  return (n.op().find("Dequeue") != std::string::npos &&
-          n.op().find("DequeueMany") == std::string::npos);
+  return (n.op().find("Dequeue") != string::npos &&
+          n.op().find("DequeueMany") == string::npos);
 }
 
 bool HasAnyUnknownDimensions(const TensorShapeProto& proto) {
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index aad00ce039..83434ea40f 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -127,7 +127,7 @@ static void ExtractExtraProperties(
 
       // For filename input, the file size can also be useful.
       if (op_def && i < op_def->input_arg_size() &&
-          op_def->input_arg(i).name().find("filename") != std::string::npos) {
+          op_def->input_arg(i).name().find("filename") != string::npos) {
         Tensor tensor;
         if (!tensor.FromProto(t)) {
           continue;
@@ -153,7 +153,7 @@ static void ExtractExtraProperties(
     // When the input is a handle (e.g. look up table handle), the information
     // in the op itself is not sufficient to predict the op memory.
     if (op_def && i < op_def->input_arg_size() &&
-        op_def->input_arg(i).name().find("handle") != std::string::npos) {
+        op_def->input_arg(i).name().find("handle") != string::npos) {
       string new_key = strings::StrCat("parent_", i, "_op");
       AttrValue attr;
       attr.set_s(input_node->op());
@@ -320,8 +320,8 @@ void TensorSizeHistogram::Merge(const TensorSizeHistogram& src) {
                  buckets_.begin(), std::plus());
 }
 
-std::string TensorSizeHistogram::ToString() const {
-  std::string r;
+string TensorSizeHistogram::ToString() const {
+  string r;
   char buf[200];
   snprintf(buf, sizeof(buf), "Count: %lld, Average: ", num_elem_);
   r.append(buf);
diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h
index d2c7c67666..5fd6717712 100644
--- a/tensorflow/core/grappler/costs/utils.h
+++ b/tensorflow/core/grappler/costs/utils.h
@@ -80,7 +80,7 @@ class TensorSizeHistogram {
   uint64 Max() const { return max_; }
   uint64 NumElem() const { return num_elem_; }
   uint64 SumElem() const { return sum_elem_; }
-  std::string ToString() const;
+  string ToString() const;
 
  protected:
   const int Index(const uint64 value) const;
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index 02a379fca8..80889afc86 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -1999,13 +1999,13 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) {
 
   // Helper lambda to extract port num from _Send and _Recv op name.
   auto get_port_num = [](const string& name) -> int {
-    if (name.find("bn_0") != std::string::npos) {
+    if (name.find("bn_0") != string::npos) {
       return 0;
-    } else if (name.find("bn_1") != std::string::npos) {
+    } else if (name.find("bn_1") != string::npos) {
       return 1;
-    } else if (name.find("bn_2") != std::string::npos) {
+    } else if (name.find("bn_2") != string::npos) {
       return 2;
-    } else if (name.find("bn_minus1") != std::string::npos) {
+    } else if (name.find("bn_minus1") != string::npos) {
       return -1;
     }
     return -999;
diff --git a/tensorflow/core/grappler/inputs/utils.cc b/tensorflow/core/grappler/inputs/utils.cc
index 5029dff877..def9198a69 100644
--- a/tensorflow/core/grappler/inputs/utils.cc
+++ b/tensorflow/core/grappler/inputs/utils.cc
@@ -14,10 +14,11 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/inputs/utils.h"
-#include "tensorflow/core/platform/env.h"
 
 #include 
 
+#include "tensorflow/core/platform/env.h"
+
 namespace tensorflow {
 namespace grappler {
 
@@ -29,12 +30,12 @@ bool FilesExist(const std::set& files) {
   return FilesExist(std::vector(files.begin(), files.end()), nullptr);
 }
 
-bool FileExists(const std::string& file, Status* status) {
+bool FileExists(const string& file, Status* status) {
   *status = Env::Default()->FileExists(file);
   return status->ok();
 }
 
-Status ReadGraphDefFromFile(const std::string& graph_def_pbtxt_path,
+Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path,
                             GraphDef* result) {
   Status status;
   if (FileExists(graph_def_pbtxt_path, &status)) {
diff --git a/tensorflow/core/grappler/inputs/utils.h b/tensorflow/core/grappler/inputs/utils.h
index 627dd5359f..4b9cb0a9ad 100644
--- a/tensorflow/core/grappler/inputs/utils.h
+++ b/tensorflow/core/grappler/inputs/utils.h
@@ -29,9 +29,9 @@ bool FilesExist(const std::vector& files,
                 std::vector* status = nullptr);
 bool FilesExist(const std::set& files);
 
-bool FileExists(const std::string& file, Status* status);
+bool FileExists(const string& file, Status* status);
 
-Status ReadGraphDefFromFile(const std::string& graph_def_pbtxt_path,
+Status ReadGraphDefFromFile(const string& graph_def_pbtxt_path,
                             GraphDef* result);
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index e78239bd43..3521669b63 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -491,7 +491,7 @@ bool IsFreeOfSideEffect(const NodeDef& node) {
     }
   }
   // Queue ops modify the queue which is a side effect.
-  if (node.op().find("Queue") != std::string::npos) {
+  if (node.op().find("Queue") != string::npos) {
     return false;
   }
   return !ModifiesInputsInPlace(node);
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 39517edc06..bc838c6659 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -581,7 +581,7 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimple) {
   const NodeDef* new_const = node_map.GetNode(optimized_const_name);
   ASSERT_NE(new_const, nullptr);
   EXPECT_EQ("^x", new_const->input(0));
-  EXPECT_EQ(std::string("\0\0\0@", 4),
+  EXPECT_EQ(string("\0\0\0@", 4),
             new_const->attr().at("value").tensor().tensor_content());
 
   const NodeDef* new_mul = node_map.GetNode(optimized_mul_name);
@@ -625,7 +625,7 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsSimpleWithControlDep) {
   const NodeDef* new_const = node_map.GetNode(optimized_const_name);
   ASSERT_NE(new_const, nullptr);
   EXPECT_EQ("^x", new_const->input(0));
-  EXPECT_EQ(std::string("\0\0\0@", 4),
+  EXPECT_EQ(string("\0\0\0@", 4),
             new_const->attr().at("value").tensor().tensor_content());
 
   const NodeDef* new_mul = node_map.GetNode(optimized_mul_name);
diff --git a/tensorflow/core/grappler/optimizers/data/graph_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
index 5a7fe19265..d4ab444036 100644
--- a/tensorflow/core/grappler/optimizers/data/graph_utils.cc
+++ b/tensorflow/core/grappler/optimizers/data/graph_utils.cc
@@ -273,7 +273,7 @@ void SetUniqueGraphNodeName(StringPiece prefix, GraphDef* graph,
   string name = string(prefix);
   int id = graph->node_size();
   while (ContainsGraphNodeWithName(name, *graph)) {
-    if (name.rfind("_generated") != std::string::npos &&
+    if (name.rfind("_generated") != string::npos &&
         (name.rfind("_generated") == (name.size() - strlen("_generated")))) {
       name.insert(name.rfind("_generated"), strings::StrCat("/_", id));
     } else {
-- 
GitLab


From 54cbee5d034af8693aa39cc5877c3dfcd62d3740 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Fri, 14 Sep 2018 02:30:05 -0700
Subject: [PATCH 0193/1357] [TF:XLA] Split XLA Concat Ops that fail on large
 sets of inputs.

Make the test large to prevent occasional timeouts on CPU. This should normally complete in well under a minute.

PiperOrigin-RevId: 212953337
---
 tensorflow/compiler/tests/BUILD               |  3 +-
 tensorflow/compiler/tests/concat_ops_test.py  | 35 +++++++++++++++++++
 .../compiler/tf2xla/kernels/concat_op.cc      | 33 ++++++++++++++++-
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 2176eaebe4..97ed554171 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -277,9 +277,10 @@ tf_xla_py_test(
     ],
 )
 
+# This test is large because occasionally the cpu test is long for testConcatLargeNumberOfTensors
 tf_xla_py_test(
     name = "concat_ops_test",
-    size = "medium",
+    size = "large",
     srcs = ["concat_ops_test.py"],
     deps = [
         ":xla_test",
diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py
index 37e5318bb5..2d225ad226 100644
--- a/tensorflow/compiler/tests/concat_ops_test.py
+++ b/tensorflow/compiler/tests/concat_ops_test.py
@@ -291,6 +291,41 @@ class ConcatTest(xla_test.XLATestCase):
             ValueError, r"Can't concatenate scalars \(use tf\.stack instead\)"):
           array_ops.concat([scalar, scalar, scalar], dim)
 
+  # The purpose of this is to ensure that XLA on GPU will not run out of memory
+  # with too many arguments.
+  def testConcatLargeNumberOfTensors(self):
+    with self.cached_session():
+      with self.test_scope():
+        for concat_dim in range(2):
+          params = {}
+          p = []
+          shape = np.array([7, 13])
+          num_tensors = 1001
+          for i in np.arange(num_tensors):
+            input_shape = shape
+            placeholder = array_ops.placeholder(
+                dtypes.float32, shape=input_shape)
+            p.append(placeholder)
+            params[placeholder] = np.random.rand(*input_shape).astype(
+                np.float32)
+
+          concat_inputs = p
+          c = array_ops.concat(concat_inputs, concat_dim)
+          result = c.eval(feed_dict=params)
+
+          self.assertEqual(result.shape, c.get_shape())
+          cur_offset = 0
+
+          for i in np.arange(num_tensors):
+            # The index into the result is the ':' along all dimensions
+            # except the concat_dim. slice(0, size) is used for ':', and
+            # a list of slices is used to index into result.
+            index = [slice(0, params[p[i]].shape[j]) for j in np.arange(2)]
+            index[concat_dim] = slice(
+                cur_offset, cur_offset + params[p[i]].shape[concat_dim])
+            cur_offset += params[p[i]].shape[concat_dim]
+            self.assertAllEqual(result[index], params[p[i]])
+
 
 class ConcatOffsetTest(xla_test.XLATestCase):
 
diff --git a/tensorflow/compiler/tf2xla/kernels/concat_op.cc b/tensorflow/compiler/tf2xla/kernels/concat_op.cc
index f410605104..0ae23aa6df 100644
--- a/tensorflow/compiler/tf2xla/kernels/concat_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/concat_op.cc
@@ -37,6 +37,16 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+// Used to determine the number of Tensors allowed in a Concat op to prevent
+// going over the max gpu parameter memory size. This is an issue because concat
+// is variadic and can have an unlimited number of arguments when called.
+// Concat ops with more Tensors than this will be split into multiple concat
+// ops.
+//
+// TODO(b/112613927): Remove the logic here and put it properly in an HLO pass
+// along with boxing large numbers of parameters.
+constexpr int64 kMaxConcatArgsPerOp = 500;
+
 // --------------------------------------------------------------------------
 class ConcatBaseOp : public XlaOpKernel {
  public:
@@ -74,6 +84,7 @@ class ConcatBaseOp : public XlaOpKernel {
     // Make a vector holding the XlaOp for each of the inputs that has non-zero
     // elements.
     std::vector input_data;
+    std::vector partial_concats;
     int output_concat_dim = 0;
     const bool input_is_scalar = IsLegacyScalar(input_shape);
     for (int i = 0; i < N; ++i) {
@@ -94,10 +105,30 @@ class ConcatBaseOp : public XlaOpKernel {
         input_data.push_back(handle);
       }
       output_concat_dim += in_shape.dims() > 0 ? in_shape.dim_size(axis) : 1;
+
+      // Concat is associative, so it can be split into many operations when too
+      // many arguments are in a single op. This is a temporary workaround for
+      // b/112613927 where too many parameters in an XlaLaunchOp later result in
+      // too many parameters to a single GPU kernel.
+      if (i && i % kMaxConcatArgsPerOp == 0) {
+        partial_concats.push_back(
+            xla::ConcatInDim(ctx->builder(), input_data, axis));
+        input_data.clear();
+      }
     }
+    // Add any inputs that have not been put into another concat yet.
+    partial_concats.insert(partial_concats.end(), input_data.begin(),
+                           input_data.end());
 
     VLOG(1) << "Concat dim " << concat_dim << " equivalent to " << axis;
-    ctx->SetOutput(0, xla::ConcatInDim(ctx->builder(), input_data, axis));
+    // Don't add an additional "identity" concatenate for better readibility of
+    // IR.
+    if (partial_concats.size() == 1) {
+      ctx->SetOutput(0, partial_concats.front());
+    } else {
+      ctx->SetOutput(0,
+                     xla::ConcatInDim(ctx->builder(), partial_concats, axis));
+    }
   }
 
  private:
-- 
GitLab


From 3c283b598f2de0376dfaf63ed50c4625abbf6e03 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer 
Date: Fri, 14 Sep 2018 06:23:56 -0700
Subject: [PATCH 0194/1357] Run buildifier on build_defs.bzl

PiperOrigin-RevId: 212972521
---
 tensorflow/compiler/tests/build_defs.bzl | 165 ++++++++++++-----------
 1 file changed, 87 insertions(+), 78 deletions(-)

diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl
index 7b114d4f85..a76f136736 100644
--- a/tensorflow/compiler/tests/build_defs.bzl
+++ b/tensorflow/compiler/tests/build_defs.bzl
@@ -4,88 +4,97 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_is_configured")
 load("//tensorflow/compiler/tests:plugin.bzl", "plugins")
 
 def all_backends():
-  b = ["cpu"] + plugins.keys()
-  if cuda_is_configured():
-    return b + ["gpu"]
-  else:
-    return b
+    b = ["cpu"] + plugins.keys()
+    if cuda_is_configured():
+        return b + ["gpu"]
+    else:
+        return b
 
-def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None,
-                   disabled_backends=None, **kwargs):
-  """Generates py_test targets, one per XLA backend.
+def tf_xla_py_test(
+        name,
+        srcs = [],
+        deps = [],
+        tags = [],
+        data = [],
+        main = None,
+        disabled_backends = None,
+        **kwargs):
+    """Generates py_test targets, one per XLA backend.
 
-  This rule generates py_test() targets named name_backend, for each backend
-  in all_backends(). The rule also generates a test suite with named `name` that
-  tests all backends for the test.
+    This rule generates py_test() targets named name_backend, for each backend
+    in all_backends(). The rule also generates a test suite with named `name` that
+    tests all backends for the test.
 
-  For example, the following rule generates test cases foo_test_cpu,
-  foo_test_gpu, and a test suite name foo_test that tests both.
-  tf_xla_py_test(
-      name="foo_test",
-      srcs="foo_test.py",
-      deps=[...],
-  )
+    For example, the following rule generates test cases foo_test_cpu,
+    foo_test_gpu, and a test suite name foo_test that tests both.
+    tf_xla_py_test(
+        name="foo_test",
+        srcs="foo_test.py",
+        deps=[...],
+    )
 
-  Args:
-    name: Name of the target.
-    srcs: Sources for the target.
-    deps: Dependencies of the target.
-    tags: Tags to apply to the generated targets.
-    data: Data dependencies of the target.
-    main: Same as py_test's main attribute.
-    disabled_backends: A list of backends that should not be tested. Supported
-      values include "cpu" and "gpu". If not specified, defaults to None.
-    **kwargs: keyword arguments passed onto the generated py_test() rules.
-  """
-  if disabled_backends == None:
-    disabled_backends = []
+    Args:
+      name: Name of the target.
+      srcs: Sources for the target.
+      deps: Dependencies of the target.
+      tags: Tags to apply to the generated targets.
+      data: Data dependencies of the target.
+      main: Same as py_test's main attribute.
+      disabled_backends: A list of backends that should not be tested. Supported
+        values include "cpu" and "gpu". If not specified, defaults to None.
+      **kwargs: keyword arguments passed onto the generated py_test() rules.
+    """
+    if disabled_backends == None:
+        disabled_backends = []
 
-  enabled_backends = [b for b in all_backends() if b not in disabled_backends]
-  test_names = []
-  for backend in enabled_backends:
-    test_name = "{}_{}".format(name, backend)
-    backend_tags = ["tf_xla_{}".format(backend)]
-    backend_args = []
-    backend_deps = []
-    backend_data = []
-    if backend == "cpu":
-      backend_args += [
-          "--test_device=XLA_CPU",
-          "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64"
-      ]
-    elif backend == "gpu":
-      backend_args += [
-          "--test_device=XLA_GPU",
-          "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16"
-      ]
-      backend_tags += ["requires-gpu-sm35"]
-    elif backend in plugins:
-      backend_args += ["--test_device=" + plugins[backend]["device"],
-                       "--types=" + plugins[backend]["types"]]
-      backend_tags += plugins[backend]["tags"]
-      backend_args += plugins[backend]["args"]
-      backend_deps += plugins[backend]["deps"]
-      backend_data += plugins[backend]["data"]
-    else:
-      fail("Unknown backend {}".format(backend))
+    enabled_backends = [b for b in all_backends() if b not in disabled_backends]
+    test_names = []
+    for backend in enabled_backends:
+        test_name = "{}_{}".format(name, backend)
+        backend_tags = ["tf_xla_{}".format(backend)]
+        backend_args = []
+        backend_deps = []
+        backend_data = []
+        if backend == "cpu":
+            backend_args += [
+                "--test_device=XLA_CPU",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64",
+            ]
+        elif backend == "gpu":
+            backend_args += [
+                "--test_device=XLA_GPU",
+                "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16",
+            ]
+            backend_tags += ["requires-gpu-sm35"]
+        elif backend in plugins:
+            backend_args += [
+                "--test_device=" + plugins[backend]["device"],
+                "--types=" + plugins[backend]["types"],
+            ]
+            backend_tags += plugins[backend]["tags"]
+            backend_args += plugins[backend]["args"]
+            backend_deps += plugins[backend]["deps"]
+            backend_data += plugins[backend]["data"]
+        else:
+            fail("Unknown backend {}".format(backend))
 
-    native.py_test(
-        name=test_name,
-        srcs=srcs,
-        srcs_version="PY2AND3",
-        args=backend_args,
-        main="{}.py".format(name) if main == None else main,
-        data=data + backend_data,
-        deps=deps + backend_deps,
-        tags=tags + backend_tags,
-        **kwargs
-    )
-    test_names.append(test_name)
-  native.test_suite(name=name, tests=test_names)
+        native.py_test(
+            name = test_name,
+            srcs = srcs,
+            srcs_version = "PY2AND3",
+            args = backend_args,
+            main = "{}.py".format(name) if main == None else main,
+            data = data + backend_data,
+            deps = deps + backend_deps,
+            tags = tags + backend_tags,
+            **kwargs
+        )
+        test_names.append(test_name)
+    native.test_suite(name = name, tests = test_names)
 
-def generate_backend_suites(backends=[]):
-  """Generates per-backend test_suites that run all tests for a backend."""
-  if not backends:
-    backends = all_backends()
-  for backend in backends:
-    native.test_suite(name="%s_tests" % backend, tags=["tf_xla_%s" % backend])
+def generate_backend_suites(backends = []):
+    """Generates per-backend test_suites that run all tests for a backend."""
+    if not backends:
+        backends = all_backends()
+    for backend in backends:
+        native.test_suite(name = "%s_tests" % backend, tags = ["tf_xla_%s" % backend])
-- 
GitLab


From 6aebb0866718cae2c921e875f3fd74573ee9acc8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Fri, 14 Sep 2018 08:29:15 -0700
Subject: [PATCH 0195/1357] global_step/sec renamed to global_steps/sec

PiperOrigin-RevId: 212986442
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 1ff04f5c26..23c54511ca 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1774,18 +1774,19 @@ class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
         summary_writer=summary_writer)
 
   def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
-    global_step_per_sec = elapsed_steps / elapsed_time
-    examples_per_sec = self._batch_size * global_step_per_sec
+    global_steps_per_sec = elapsed_steps / elapsed_time
+    examples_per_sec = self._batch_size * global_steps_per_sec
     if self._summary_writer is not None:
       global_step_summary = Summary(value=[
-          Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec)
+          Summary.Value(tag='global_steps/sec',
+                        simple_value=global_steps_per_sec)
       ])
       example_summary = Summary(value=[
           Summary.Value(tag='examples/sec', simple_value=examples_per_sec)
       ])
       self._summary_writer.add_summary(global_step_summary, global_step)
       self._summary_writer.add_summary(example_summary, global_step)
-    logging.info('global_step/sec: %g', global_step_per_sec)
+    logging.info('global_steps/sec: %g', global_steps_per_sec)
     logging.info('examples/sec: %g', examples_per_sec)
 
 
-- 
GitLab


From 85b0ec839b6954fc7d3f396406e8797cc984d3cc Mon Sep 17 00:00:00 2001
From: Lasse Espeholt 
Date: Fri, 14 Sep 2018 08:53:44 -0700
Subject: [PATCH 0196/1357] Fix bug preventing one from not specifying
 additional function attributes.

PiperOrigin-RevId: 212989480
---
 tensorflow/python/eager/function.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 552ed29f65..962e334b27 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -529,7 +529,7 @@ class Function(object):
     self._num_outputs = len(self._func_graph.outputs)
     self._output_shapes = tuple(
         output.shape for output in self._func_graph.outputs)
-    self._attrs = _parse_func_attrs(attrs)
+    self._attrs = _parse_func_attrs(attrs or {})
     self._device_functions = tuple(
         self._func_graph._device_functions_outer_to_inner)  # pylint: disable=protected-access
 
-- 
GitLab


From 95338704198205c1bdec1e344e103f1daf05df68 Mon Sep 17 00:00:00 2001
From: Nupur Garg 
Date: Fri, 14 Sep 2018 09:04:42 -0700
Subject: [PATCH 0197/1357] Internal change.

PiperOrigin-RevId: 212991181
---
 tensorflow/contrib/lite/build_def.bzl | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl
index 5c705ea53b..52b994ee92 100644
--- a/tensorflow/contrib/lite/build_def.bzl
+++ b/tensorflow/contrib/lite/build_def.bzl
@@ -337,11 +337,7 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs):
         flags = "--ignore_toco_errors --run_with_extended"
         kwargs["tags"].append("skip_already_failing")
         kwargs["tags"].append("no_oss")
-
-        # TODO(b/115504899): Re-enable asan, msan and tsan tests.
-        kwargs["tags"].append("noasan")
-        kwargs["tags"].append("nomsan")
-        kwargs["tags"].append("notsan")
+        kwargs["tags"].append("notap")
 
     gen_zipped_test_file(
         name = "zip_%s" % test_name,
-- 
GitLab


From 7210ca23ce19e54aa3cbc21ff72e5e5d4189dfea Mon Sep 17 00:00:00 2001
From: avijit-nervana 
Date: Fri, 14 Sep 2018 09:55:23 -0700
Subject: [PATCH 0198/1357] Fixed the missing license file caught by
 do_pip_package_licenses_check test.

---
 tensorflow/tools/pip_package/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 50515b04a9..31a3712de8 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -210,6 +210,7 @@ filegroup(
         "@ngraph//:LICENSE",
         "@ngraph_tf//:LICENSE",
         "@nlohmann_json_lib//:LICENSE.MIT",
+        "@tbb//:LICENSE",
     ]) + tf_additional_license_deps(),
 )
 
-- 
GitLab


From 82e4edc50fb146dbf006cd81aaac6d01f40533a6 Mon Sep 17 00:00:00 2001
From: Raghuraman Krishnamoorthi 
Date: Fri, 14 Sep 2018 09:51:21 -0700
Subject: [PATCH 0199/1357]  Update description of contrib.quantize

PiperOrigin-RevId: 212997520
---
 tensorflow/contrib/quantize/README.md | 158 ++++++++++++++++++++------
 1 file changed, 124 insertions(+), 34 deletions(-)

diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md
index 27a933c0f9..3f1e7d2792 100644
--- a/tensorflow/contrib/quantize/README.md
+++ b/tensorflow/contrib/quantize/README.md
@@ -1,65 +1,155 @@
-# Quantized Training Rewrites
+# Quantization-aware training
 
-tf.contrib.quantize provides tools for transforming graphs to include ops to
-model quantization of weights, biases and activations during both training and
-inference. The details of the transformation implemented in this package is
-described here [1].
+Quantization-aware model training ensures that the forward pass matches precision
+for both training and inference. There are two aspects to this:
 
-This is done using the
-[fake quantization op](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization).
+* Operator fusion at inference time are accurately modeled at training time.
+* Quantization effects at inference are modeled at training time.
 
-Literature has shown that fixed point networks provide comparable performance to
-floating point networks [2]. This is achieved by modeling the quantization
-operation during training in both the forward and backward passes.
-The fake quantization operator achieves this by modeling the quantizer as a pass
-through estimator [3]. Note that during back propagation, the parameters are
+For efficient inference, TensorFlow combines batch normalization with the preceding
+convolutional and fully-connected layers prior to quantization by
+[folding batch norm layers](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/quantize/python/fold_batch_norms.py){:.external}. 
+
+The quantization error is modeled using [fake quantization](../api_guides/python/array_ops.md#Fake_quantization)
+nodes to simulate the effect of quantization in the forward and backward passes. The
+forward-pass models quantization, while the backward-pass models quantization as a
+straight-through estimator. Both the forward- and backward-pass simulate the quantization
+of weights and activations. Note that during back propagation, the parameters are
 updated at high precision as this is needed to ensure sufficient precision in
-accumulating tiny adjustments to the parameters. However, for the forward pass,
-the parameters and activations are quantized to the desired lower precision.
+accumulating tiny adjustments to the parameters.
+
 
-## How to use the Rewrites
+Additionally, the minimum and maximum values for activations are determined
+during training. This allows a model trained with quantization in the loop to be
+converted to a fixed point inference model with little effort, eliminating the
+need for a separate calibration step.
 
-tf.contrib.quantize provides two rewrites, one to train for quantization and
-one to create a [TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/)
-compatible eval graph.
+Since it's difficult to add these fake quantization operations to all the
+required locations in the model, there's a function available that rewrites the
+training graph. To create a fake quantized training graph:
 
 ```
 # Build forward pass of model.
-…
 loss = tf.losses.get_total_loss()
 
-# Call the training rewrite which rewrites the graph in-place with FakeQuantization nodes
-# and folds batchnorm for training.
-# It is often needed to finetune a floating point model for quantization with this training tool.
-# When training from scratch, quant_delay can be used to activate quantization after
-# training to convergence with the float graph, effectively finetuning the model.
-tf.contrib.quantize.create_training_graph(quant_delay=2000000)
+# Call the training rewrite which rewrites the graph in-place with
+# FakeQuantization nodes and folds batchnorm for training. It is
+# often needed to fine tune a floating point model for quantization
+# with this training tool. When training from scratch, quant_delay
+# can be used to activate quantization after training to converge
+# with the float graph, effectively fine-tuning the model.
+g = tf.get_default_graph()
+tf.contrib.quantize.create_training_graph(input_graph=g,
+                                          quant_delay=2000000)
 
 # Call backward pass optimizer as usual.
 optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 optimizer.minimize(loss)
 ```
 
-Additionally, the rewritten eval graph is non-trivially different from the
-training graph due the effects of quantization on batch normalization. Thus,
-we offer a separate rewrite for the eval_graph.
+The rewritten *eval graph* is non-trivially different from the *training graph*
+since the quantization ops affect the batch normalization step. Because of this,
+we've added a separate rewrite for the *eval graph*:
 
 ```
 # Build eval model
-…
-logits = tf.nn.softmax_cross_entropy_with_logits(...)
+logits = tf.nn.softmax_cross_entropy_with_logits_v2(...)
 
-# Call the eval rewrite which rewrites the graph in-place with FakeQuantization nodes
-# and fold batchnorm for eval.
-tf.contrib.quantize.create_eval_graph()
+# Call the eval rewrite which rewrites the graph in-place with
+# FakeQuantization nodes and fold batchnorm for eval.
+g = tf.get_default_graph()
+tf.contrib.quantize.create_eval_graph(input_graph=g)
 
-# Save the checkpoint and eval graph proto to disk for freezing and providing to TFLite.
+# Save the checkpoint and eval graph proto to disk for freezing
+# and providing to TFLite.
 with open(eval_graph_file, ‘w’) as f:
   f.write(str(g.as_graph_def()))
 saver = tf.train.Saver()
 saver.save(sess, checkpoint_name)
 ```
 
+Methods to rewrite the training and eval graphs are an active area of research
+and experimentation. Although rewrites and quantized training might not work or
+improve performance for all models, we are working to generalize these techniques.
+
+
+## Generating fully-quantized models
+
+The previously demonstrated after-rewrite eval graph only *simulates*
+quantization. To generate real fixed-point computations from a trained
+quantization model, convert it to a fixed-point kernel. TensorFlow Lite supports
+this conversion from the graph resulting from `create_eval_graph`.
+
+First, create a frozen graph that will be the input for the TensorFlow Lite
+toolchain:
+
+```
+freeze_graph \
+  --input_graph=eval_graph_def.pb \
+  --input_checkpoint=checkpoint \
+  --output_graph=frozen_eval_graph.pb --output_node_names=outputs
+```
+
+Provide this to the TensorFlow Lite Optimizing Converter (TOCO) to get a
+fully-quantized TensorFlow Lite model:
+
+```
+toco \
+  --input_file=frozen_eval_graph.pb \
+  --output_file=tflite_model.tflite \
+  --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \
+  --inference_type=QUANTIZED_UINT8 \
+  --input_shape="1,224, 224,3" \
+  --input_array=input \
+  --output_array=outputs \
+  --std_value=127.5 --mean_value=127.5
+```
+
+See the documentation for `tf.contrib.quantize` and [TensorFlow Lite](../mobile/tflite/).
+
+
+## Quantized accuracy results
+
+The following are results of trainiing some popular CNN models (Mobilenet-v1,
+Mobilenet-v2, and Inception-v3) using this tool:
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
ModelTop-1 Accuracy:
Floating point
Top-1 Accuracy:
Fixed point: 8 bit weights and activations
Mobilenet-v1-128-0.250.4150.399
Mobilenet-v1-128-0.50.5630.549
Mobilenet-v1-128-0.750.6210.598
Mobilenet-v1-128-10.6520.64
Mobilenet-v1-160-0.250.4550.435
Mobilenet-v1-160-0.50.5910.577
Mobilenet-v1-160-0.750.6530.639
Mobilenet-v1-160-10.680.673
Mobilenet-v1-192-0.250.4770.458
Mobilenet-v1-192-0.50.6170.604
Mobilenet-v1-192-0.750.6720.662
Mobilenet-v1-192-10.70.69
Mobilenet-v1-224-0.250.4980.482
Mobilenet-v1-224-0.50.6330.622
Mobilenet-v1-224-0.750.6840.679
Mobilenet-v1-224-10.7090.697
Mobilenet-v2-224-10.7180.708
Inception_v30.780.775
+
+ Table 1: Top-1 accuracy of floating point and fully quantized CNNs on Imagenet Validation dataset. +
+
+ +Our pre-trained models are available in the +TensorFlow Lite model repository. The code used to generate +these models is available. + + + These rewrites are an active area of research and experimentation, so the rewrites and quantized training will likely not work across all models, though we hope to work towards generalizing these techniques. -- GitLab From c7458c97a5f752a2ae79da4cba04ced0dbcb76df Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Fri, 14 Sep 2018 10:26:11 -0700 Subject: [PATCH 0200/1357] Export tf.keras.sparse_categorical_accuracy. Copied from PR #21790. closes #21790, fixes #21735 PiperOrigin-RevId: 213003724 --- tensorflow/python/keras/metrics.py | 1 + tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt | 4 ++++ tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 473d8cd95b..fd3c39cf2e 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -615,6 +615,7 @@ def categorical_accuracy(y_true, y_pred): K.floatx()) +@tf_export('keras.metrics.sparse_categorical_accuracy') def sparse_categorical_accuracy(y_true, y_pred): y_true = math_ops.reduce_max(y_true, axis=-1) y_pred = math_ops.argmax(y_pred, axis=-1) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt index 73b577da37..a296e13158 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt @@ -104,6 +104,10 @@ tf_module { name: "serialize" argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "sparse_categorical_accuracy" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "sparse_categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt index 73b577da37..a296e13158 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt @@ -104,6 +104,10 @@ tf_module { name: "serialize" argspec: "args=[\'metric\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "sparse_categorical_accuracy" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "sparse_categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" -- GitLab From 61743287362feb358dfe63cffd1e232f01ca2ab0 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 14 Sep 2018 10:44:45 -0700 Subject: [PATCH 0201/1357] Make tf.gradients() correctly handle captured EagerTensors. tf.gradients() can't be used to take the gradient of eager computations, but it should handle ops that take captured eager tensors as input as long as the gradient computation doesn't depend on that input. This change makes sure the gradient algorithm doesn't try to access the op, etc. of EagerTensors. PiperOrigin-RevId: 213007155 --- tensorflow/python/ops/gradients_impl.py | 50 +++++++++++++++---------- tensorflow/python/ops/gradients_test.py | 18 +++++++++ 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 196161c661..056015d6b6 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -184,7 +184,7 @@ def _PendingCount(to_ops, from_ops, colocate_gradients_with_ops, func_graphs, between_op_list.append(op) # Clear the boolean so we won't add the inputs again. reached_ops.remove(op) - for inp in _Inputs(op, xs): + for inp in _NonEagerInputs(op, xs): queue.append(inp.op) # X in between_ops iff X is on a path of zero or more backpropagatable tensors # between from_ops and to_ops @@ -196,7 +196,7 @@ def _PendingCount(to_ops, from_ops, colocate_gradients_with_ops, func_graphs, # Initialize pending count for between ops. pending_count = collections.defaultdict(int) for op in between_op_list: - for x in _Inputs(op, xs): + for x in _NonEagerInputs(op, xs): if x.op in between_ops: pending_count[x.op] += 1 @@ -347,7 +347,7 @@ def _StopOps(from_ops, stop_gradient_ops, pending_count, xs): stop_ops = set() for op in from_ops: is_stop_op = True - for inp in _Inputs(op, xs): + for inp in _NonEagerInputs(op, xs): if pending_count[inp.op] > 0: is_stop_op = False break @@ -371,10 +371,10 @@ def _IsPartitionedCall(op): return op.type == "PartitionedCall" or op.type == "StatefulPartitionedCall" -def _SymGrad(op, out_grads, xs): +def _SymGrad(op, out_grads): """Backprop through a function call node op given its outputs' gradients.""" - f_in = [x for x in _Inputs(op, xs)] + out_grads - f_types = [x.dtype for x in _Inputs(op, xs)] + f_in = [x for x in op.inputs] + out_grads + f_types = [x.dtype for x in op.inputs] f = attr_value_pb2.NameAttrList() if _IsPartitionedCall(op): f.name = op.get_attr("f").name @@ -441,7 +441,7 @@ def _RaiseNoGradWrtInitialLoopValError(op, from_ops, xs): if curr_op in from_ops: target_op = curr_op break - queue.extend(t.op for t in _Inputs(curr_op, xs)) + queue.extend(t.op for t in _NonEagerInputs(curr_op, xs)) assert target_op raise ValueError( "Cannot compute gradient inside while loop with respect to op '%s'. " @@ -474,7 +474,8 @@ def _MaybeCaptured(t): A tensor, potentially from a different Graph/_function.FuncGraph. """ # pylint: disable=protected-access - if _IsFunction(t.op.graph) and t.op.type == "Placeholder": + if (not isinstance(t, ops.EagerTensor) and + _IsFunction(t.op.graph) and t.op.type == "Placeholder"): for input_t, placeholder_t in _Captures(t.op.graph).items(): if t == placeholder_t: return _MaybeCaptured(input_t) @@ -484,9 +485,12 @@ def _MaybeCaptured(t): # TODO(skyewm): plumbing xs through everywhere is ugly, consider making # _GradientsHelper a class with xs as a member variable. -def _Inputs(op, xs): +def _NonEagerInputs(op, xs): """Returns the inputs of op, crossing closure boundaries where necessary. + Does not return any captured EagerTensors, i.e., the number of tensors + returned may be less than than the actual number of inputs. + Args: op: Operation xs: list of Tensors we are differentiating w.r.t. @@ -497,12 +501,19 @@ def _Inputs(op, xs): captured inputs. """ if _IsFunction(op.graph): # pylint: disable=protected-access - # If we're differentiating w.r.t. `t`, do not attempt to traverse through it - # to a captured value. The algorithm needs to "see" `t` in this case, even - # if it's a function input for a captured value, whereas usually we'd like - # to traverse through these closures as if the captured value was the direct - # input to op. - return [t if (t in xs) else _MaybeCaptured(t) for t in op.inputs] + inputs = [] + for t in op.inputs: + # If we're differentiating w.r.t. `t`, do not attempt to traverse through + # it to a captured value. The algorithm needs to "see" `t` in this case, + # even if it's a function input for a captured value, whereas usually we'd + # like to traverse through these closures as if the captured value was the + # direct input to op. + if t not in xs: + t = _MaybeCaptured(t) + # Skip captured eager inputs. + if isinstance(t, ops.EagerTensor): continue + inputs.append(t) + return inputs else: return op.inputs @@ -805,7 +816,7 @@ def _GradientsHelper(ys, # For function call ops, we add a 'SymbolicGradient' # node to the graph to compute gradients. in_grads = _MaybeCompile(grad_scope, op, func_call, - lambda: _SymGrad(op, out_grads, xs)) + lambda: _SymGrad(op, out_grads)) in_grads = _AsList(in_grads) _VerifyGeneratedGradients(in_grads, op) if gate_gradients and len([x for x in in_grads @@ -820,8 +831,9 @@ def _GradientsHelper(ys, else: # If no grad_fn is defined or none of out_grads is available, # just propagate a list of None backwards. - in_grads = [None] * len(_Inputs(op, xs)) - for i, (t_in, in_grad) in enumerate(zip(_Inputs(op, xs), in_grads)): + in_grads = [None] * len(_NonEagerInputs(op, xs)) + for i, (t_in, in_grad) in enumerate(zip(_NonEagerInputs(op, xs), + in_grads)): if in_grad is not None: if (isinstance(in_grad, ops.Tensor) and t_in.dtype != dtypes.resource): @@ -862,7 +874,7 @@ def _HasAnyNotNoneGrads(grads, op): def _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state, xs): """Update pending count for the inputs of op and enqueue ready ops.""" - for x in _Inputs(op, xs): + for x in _NonEagerInputs(op, xs): pending_count[x.op] -= 1 ready = (pending_count[x.op] == 0) if loop_state and not ready: diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 6243be6c9e..4f6e5dc473 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -531,6 +531,24 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase): with self.cached_session() as sess: self.assertEqual(sess.run(z_grad), 3.0) + def testCapturedEagerTensors(self): + # Test that we can handle captured eager tensors unrelated to the gradient + # computation (i.e. we need to ignore them). + # TODO(skyewm): make it an error if you try to take the gradient wrt a + # captured EagerTensor + with context.eager_mode(): + c = constant_op.constant(2.0, name="c") + + @function.defun + def Foo(): + x = constant_op.constant(10.0, name="x") + y = math_ops.multiply(x, c, name="y") + z = math_ops.multiply(y, 3.0, name="z") + g = gradients_impl.gradients(z, x) + return g[0] + + self.assertEqual(Foo().numpy(), 6.0) + class StopGradientTest(test_util.TensorFlowTestCase): -- GitLab From d035a83459330c87bbc527e3d480b65f32841997 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 14 Sep 2018 10:46:12 -0700 Subject: [PATCH 0202/1357] Fix archive path PiperOrigin-RevId: 213007422 --- .../contrib/lite/tutorials/post_training_quant.ipynb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb index a96e2c4e1b..4929133bda 100644 --- a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb +++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb @@ -608,7 +608,8 @@ "outputs": [], "source": [ "archive_path = tf.keras.utils.get_file(\"resnet_v2_101.tgz\", \"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz\", extract=True)\n", - "archive_path = pathlib.Path(archive_path)" + "archive_path = pathlib.Path(archive_path)\n", + "archive_dir = str(archive_path.parent)" ] }, { @@ -631,7 +632,7 @@ }, "outputs": [], "source": [ - "! cat {archive_path}/resnet_v2_101_299_info.txt" + "! cat {archive_dir}/resnet_v2_101_299_info.txt" ] }, { @@ -664,8 +665,8 @@ }, "outputs": [], "source": [ - "archive_dir = str(archive_path.parent)\n", - "!ls -lh {archive_dir}" + "\n", + "!ls -lh {archive_dir}/*.tflite" ] }, { -- GitLab From 52d7ed1a133cb1c3a2e13532bf97beef19c1516d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 10:48:50 -0700 Subject: [PATCH 0203/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213007905 --- .../internal/optimized/optimized_ops.h | 193 ++++++++++++++--- .../internal/reference/reference_ops.h | 197 +++++++++++++++--- .../contrib/lite/kernels/internal/types.h | 4 +- 3 files changed, 328 insertions(+), 66 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 659a65a8ea..464207d739 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -4431,9 +4431,9 @@ inline void LocalResponseNormalization( } } -inline void Softmax(const float* input_data, const RuntimeShape& input_shape, - float beta, float* output_data, - const RuntimeShape& output_shape) { +inline void Softmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { gemmlowp::ScopedProfilingLabel label("Softmax"); MatchingFlatSize(input_shape, output_shape); @@ -4441,7 +4441,8 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape, auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape); // Compute the exponential first, removing the max coefficient for numerical // stability. - out_mat = (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * beta; + out_mat = + (in_mat.rowwise() - in_mat.colwise().maxCoeff()).array() * params.beta; // We are separating out the exp function so that exp can be vectorized. out_mat = out_mat.array().exp(); // Normalize to get the activations. @@ -4450,10 +4451,22 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape, out_mat.array().rowwise() *= scale; } -inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_beta_multiplier, int32 input_beta_left_shift, - int diff_min, uint8* output_data, +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Softmax(const float* input_data, const RuntimeShape& input_shape, + float beta, float* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + params.beta = beta; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void Softmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const uint8* input_data, + const RuntimeShape& output_shape, uint8* output_data) { + const int32 input_beta_multiplier = params.input_multiplier; + const int32 input_beta_left_shift = params.input_left_shift; + const int diff_min = params.diff_min; // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as // -32 before multiplying by input_beta_multiplier, and therefore as large as @@ -4659,10 +4672,24 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_beta_multiplier; + params.input_left_shift = input_beta_left_shift; + params.diff_min = diff_min; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + // TODO(myenik): This is the same as the reference implementation, not actually // optimized yet. -inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { +inline void LogSoftmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { gemmlowp::ScopedProfilingLabel label("LogSoftmax"); const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = @@ -4695,6 +4722,15 @@ inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy +inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, + float* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + // No params currently used for float LogSoftmax. + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + template inline gemmlowp::FixedPoint log_x_for_x_greater_than_or_equal_to_1_impl( @@ -4809,12 +4845,15 @@ log_x_for_x_greater_than_or_equal_to_1( } // Currently just a copy of the reference code. -inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_multiplier, int32 input_left_shift, - int32 reverse_scaling_divisor, - int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const RuntimeShape& output_shape) { +inline void LogSoftmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const uint8* input_data, + const RuntimeShape& output_shape, uint8* output_data) { gemmlowp::ScopedProfilingLabel label("LogSoftmax/Uint8"); + const int32 input_multiplier = params.input_multiplier; + const int32 input_left_shift = params.input_left_shift; + const int32 reverse_scaling_divisor = params.reverse_scaling_divisor; + const int32 reverse_scaling_right_shift = params.reverse_scaling_right_shift; + const int diff_min = params.diff_min; // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as // -32 before multiplying by input_beta_multiplier, and therefore as large as @@ -4896,7 +4935,24 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const RuntimeShape& input_shape, const float* input_data, +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_multiplier, int32 input_left_shift, + int32 reverse_scaling_divisor, + int32 reverse_scaling_right_shift, int diff_min, + uint8* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + params.reverse_scaling_divisor = reverse_scaling_divisor; + params.reverse_scaling_right_shift = reverse_scaling_right_shift; + params.diff_min = diff_min; + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { gemmlowp::ScopedProfilingLabel label("Logistic"); auto input_map = MapAsVector(input_data, input_shape); @@ -4905,11 +4961,23 @@ inline void Logistic(const RuntimeShape& input_shape, const float* input_data, input_map.array().unaryExpr(Eigen::internal::scalar_sigmoid_op()); } -inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Logistic(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + LogisticParams params; + // No params currently needed by float Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const uint8* input_data, + const RuntimeShape& output_shape, uint8* output_data) { gemmlowp::ScopedProfilingLabel label("Logistic/Uint8"); + const int32 input_zero_point = params.input_zero_point; + const int32 input_range_radius = params.input_range_radius; + const int32 input_multiplier = params.input_multiplier; + const int input_left_shift = params.input_left_shift; const int size = MatchingFlatSize(input_shape, output_shape); int c = 0; @@ -5042,7 +5110,22 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + LogisticParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const int16* input_data, const RuntimeShape& output_shape, int16* output_data) { gemmlowp::ScopedProfilingLabel label("Logistic/Int16"); const int flat_size = MatchingFlatSize(input_shape, output_shape); @@ -5102,26 +5185,51 @@ inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy version. +inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, + const RuntimeShape& output_shape, int16* output_data) { + LogisticParams params; + // No params currently needed by int16 Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy version. inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, int16* output_data, const RuntimeShape& output_shape) { - Logistic(input_shape, input_data, output_shape, output_data); + LogisticParams params; + // No params currently needed by int16 Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); } -inline void Tanh(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& output_shape, + float* output_data) { gemmlowp::ScopedProfilingLabel label("Tanh"); auto input_map = MapAsVector(input_data, input_shape); auto output_map = MapAsVector(output_data, output_shape); output_map.array() = input_map.array().tanh(); } -inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Tanh(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + TanhParams params; + // Currently no params needed for float Tanh. + Tanh(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& output_shape, + uint8* output_data) { // Note that this is almost the exact same code as in Logistic(). gemmlowp::ScopedProfilingLabel label("Tanh"); + const int32 input_zero_point = params.input_zero_point; + const int32 input_range_radius = params.input_range_radius; + const int32 input_multiplier = params.input_multiplier; + const int input_left_shift = params.input_left_shift; const int size = MatchingFlatSize(input_shape, output_shape); int c = 0; @@ -5263,10 +5371,25 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, - int input_left_shift, int16* output_data, - const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + TanhParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const int16* input_data, const RuntimeShape& output_shape, + int16* output_data) { gemmlowp::ScopedProfilingLabel label("Tanh/Int16"); + const int input_left_shift = params.input_left_shift; // Support for shifts is limited until we have a parameterized version of // SaturatingRoundingMultiplyByPOT(). TFLITE_DCHECK_GE(input_left_shift, 0); @@ -5363,6 +5486,16 @@ inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, + int input_left_shift, int16* output_data, + const RuntimeShape& output_shape) { + TanhParams params; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + template inline void Cast(const RuntimeShape& input_shape, const SrcT* input_data, const RuntimeShape& output_shape, DstT* output_data) { diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 66f18ec195..111adbf5b3 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -2902,9 +2902,9 @@ inline void LocalResponseNormalization( } } -inline void Softmax(const float* input_data, const RuntimeShape& input_shape, - float beta, float* output_data, - const RuntimeShape& output_shape) { +inline void Softmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); @@ -2923,21 +2923,33 @@ inline void Softmax(const float* input_data, const RuntimeShape& input_shape, // Compute sum. float sum = 0.f; for (int c = 0; c < depth; ++c) { - sum += std::exp((input_data[i * depth + c] - max) * beta); + sum += std::exp((input_data[i * depth + c] - max) * params.beta); } // Compute result. for (int c = 0; c < depth; ++c) { output_data[i * depth + c] = - std::exp((input_data[i * depth + c] - max) * beta) / sum; + std::exp((input_data[i * depth + c] - max) * params.beta) / sum; } } } -inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_beta_multiplier, int32 input_beta_left_shift, - int diff_min, uint8* output_data, +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Softmax(const float* input_data, const RuntimeShape& input_shape, + float beta, float* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + params.beta = beta; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void Softmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const uint8* input_data, + const RuntimeShape& output_shape, uint8* output_data) { + const int32 input_beta_multiplier = params.input_multiplier; + const int32 input_beta_left_shift = params.input_left_shift; + const int diff_min = params.diff_min; // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as // -32 before multiplying by input_beta_multiplier, and therefore as large as @@ -3015,8 +3027,22 @@ inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy +inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_beta_multiplier; + params.input_left_shift = input_beta_left_shift; + params.diff_min = diff_min; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void LogSoftmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); @@ -3046,6 +3072,15 @@ inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy +inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, + float* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + // No params currently used for float LogSoftmax. + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + // Although currently the name of this function says that it cannot handle // values less than 1, in practice it can handle as low as 1/x_max, where // x_max is the largest representable input. In other words, the output range @@ -3161,16 +3196,19 @@ log_x_for_x_greater_than_or_equal_to_1( input_val); } -inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_multiplier, int32 input_left_shift, - int32 reverse_scaling_divisor, - int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const RuntimeShape& output_shape) { +inline void LogSoftmax(const SoftmaxParams& params, + const RuntimeShape& input_shape, const uint8* input_data, + const RuntimeShape& output_shape, uint8* output_data) { + const int32 input_multiplier = params.input_multiplier; + const int32 input_left_shift = params.input_left_shift; + const int32 reverse_scaling_divisor = params.reverse_scaling_divisor; + const int32 reverse_scaling_right_shift = params.reverse_scaling_right_shift; + const int diff_min = params.diff_min; // The representation chosen for the input to the exp() function is Q5.26. - // We need to leave extra space since values that we skip might be as large as - // -32 before multiplying by input_beta_multiplier, and therefore as large as - // -16 afterwards. Note that exp(-8) is definitely not insignificant to - // accumulation, but exp(-16) definitely is. + // We need to leave extra space since values that we skip might be as large + // as -32 before multiplying by input_beta_multiplier, and therefore as + // large as -16 afterwards. Note that exp(-8) is definitely not + // insignificant to accumulation, but exp(-16) definitely is. static constexpr int kScaledDiffIntegerBits = 5; static constexpr int kAccumulationIntegerBits = 12; static constexpr int kOutputIntegerBits = 4; @@ -3247,7 +3285,24 @@ inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const RuntimeShape& input_shape, const float* input_data, +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_multiplier, int32 input_left_shift, + int32 reverse_scaling_divisor, + int32 reverse_scaling_right_shift, int diff_min, + uint8* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + params.reverse_scaling_divisor = reverse_scaling_divisor; + params.reverse_scaling_right_shift = reverse_scaling_right_shift; + params.diff_min = diff_min; + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); @@ -3258,10 +3313,22 @@ inline void Logistic(const RuntimeShape& input_shape, const float* input_data, } } -inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Logistic(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + LogisticParams params; + // No params currently needed by float Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const uint8* input_data, + const RuntimeShape& output_shape, uint8* output_data) { + const int32 input_zero_point = params.input_zero_point; + const int32 input_range_radius = params.input_range_radius; + const int32 input_multiplier = params.input_multiplier; + const int input_left_shift = params.input_left_shift; const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; i++) { @@ -3296,7 +3363,22 @@ inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + LogisticParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const LogisticParams& params, + const RuntimeShape& input_shape, const int16* input_data, const RuntimeShape& output_shape, int16* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); @@ -3314,8 +3396,18 @@ inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, } } -inline void Tanh(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, + const RuntimeShape& output_shape, int16* output_data) { + LogisticParams params; + // No params currently needed by int16 Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& output_shape, + float* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; i++) { @@ -3325,10 +3417,22 @@ inline void Tanh(const RuntimeShape& input_shape, const float* input_data, } } -inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Tanh(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + TanhParams params; + // Currently no params needed for float Tanh. + Tanh(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& output_shape, + uint8* output_data) { + const int32 input_zero_point = params.input_zero_point; + const int32 input_range_radius = params.input_range_radius; + const int32 input_multiplier = params.input_multiplier; + const int input_left_shift = params.input_left_shift; const int32 output_zero_point = 128; const int flat_size = MatchingFlatSize(input_shape, output_shape); @@ -3365,9 +3469,24 @@ inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, } } -inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, - int input_left_shift, int16* output_data, - const RuntimeShape& output_shape) { +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + TanhParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, + const int16* input_data, const RuntimeShape& output_shape, + int16* output_data) { + const int input_left_shift = params.input_left_shift; // Support for shifts is limited until we have a parameterized version of // SaturatingRoundingMultiplyByPOT(). TFLITE_DCHECK_GE(input_left_shift, 0); @@ -3398,6 +3517,16 @@ inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, + int input_left_shift, int16* output_data, + const RuntimeShape& output_shape) { + TanhParams params; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + inline void Dequantize(const tflite::DequantizationParams& op_params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& output_shape, float* output_data) { diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index 023707d466..87e8ff0346 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -885,8 +885,8 @@ struct SoftmaxParams { // for LogSoftmax. double beta; // uint8 inference params. Used even when beta defaults to 1.0. - int32 input_beta_multiplier; - int32 input_beta_left_shift; + int32 input_multiplier; + int32 input_left_shift; // Reverse scaling is only used by LogSoftmax. int32 reverse_scaling_divisor; int32 reverse_scaling_right_shift; -- GitLab From b2cb6e27f42cd8db6b105e686b494afe9b76324d Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Fri, 14 Sep 2018 10:50:07 -0700 Subject: [PATCH 0204/1357] Point VectorDiffeomixture to tf.linalg rather than tf.contrib.linalg PiperOrigin-RevId: 213008118 --- .../contrib/distributions/python/ops/vector_diffeomixture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index ece03fe4aa..3c8aae2797 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -23,7 +23,6 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import AffineLinearOperator from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered -from tensorflow.contrib.linalg.python.ops import linear_operator_addition as linop_add_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -36,6 +35,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops.distributions import categorical as categorical_lib from tensorflow.python.ops.distributions import distribution as distribution_lib from tensorflow.python.ops.distributions import normal as normal_lib +from tensorflow.python.ops.linalg import linear_operator_addition as linop_add_lib from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib from tensorflow.python.ops.linalg import linear_operator_full_matrix as linop_full_lib from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib -- GitLab From 81a063287a0449cfe2f20a82c036146d6e9356f9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 10:53:23 -0700 Subject: [PATCH 0205/1357] Removed unnecessary includes from stringpiece.h PiperOrigin-RevId: 213008707 --- tensorflow/core/lib/core/status.h | 1 + tensorflow/core/lib/core/stringpiece.h | 6 ------ tensorflow/core/lib/io/block_builder.h | 1 + tensorflow/core/lib/io/path.h | 1 + tensorflow/core/lib/monitoring/collection_registry.h | 1 + tensorflow/core/lib/monitoring/metric_def.h | 1 + tensorflow/core/lib/png/png_io.h | 1 + tensorflow/core/util/tensor_bundle/naming.h | 1 + 8 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h index 49f74ff47f..eb0ff555a5 100644 --- a/tensorflow/core/lib/core/status.h +++ b/tensorflow/core/lib/core/status.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index e7b17c9b36..6edff139ae 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -26,13 +26,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_CORE_STRINGPIECE_H_ #define TENSORFLOW_CORE_LIB_CORE_STRINGPIECE_H_ -#include -#include -#include -#include -#include #include "absl/strings/string_view.h" -#include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/lib/io/block_builder.h b/tensorflow/core/lib/io/block_builder.h index e2927689d2..117b6a0bb8 100644 --- a/tensorflow/core/lib/io/block_builder.h +++ b/tensorflow/core/lib/io/block_builder.h @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace table { diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h index e3649fd0c9..38fb0c5d86 100644 --- a/tensorflow/core/lib/io/path.h +++ b/tensorflow/core/lib/io/path.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_LIB_IO_PATH_H_ #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace io { diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h index c204d52cfe..9e4e1989dd 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.h +++ b/tensorflow/core/lib/monitoring/collection_registry.h @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace monitoring { diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h index 756e5c2af8..bc4365e439 100644 --- a/tensorflow/core/lib/monitoring/metric_def.h +++ b/tensorflow/core/lib/monitoring/metric_def.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace monitoring { diff --git a/tensorflow/core/lib/png/png_io.h b/tensorflow/core/lib/png/png_io.h index bb5d20fb68..c876c5156a 100644 --- a/tensorflow/core/lib/png/png_io.h +++ b/tensorflow/core/lib/png/png_io.h @@ -37,6 +37,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/png.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace png { diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h index 6539d565e2..7b101971a8 100644 --- a/tensorflow/core/util/tensor_bundle/naming.h +++ b/tensorflow/core/util/tensor_bundle/naming.h @@ -35,6 +35,7 @@ limitations under the License. #define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_NAMING_H_ #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { -- GitLab From 825098107c1d7b63d3a7b29c094ddc5dbff7cad2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 11:03:36 -0700 Subject: [PATCH 0206/1357] parallel_for: add a bunch of converters for cwise ops and gradients. PiperOrigin-RevId: 213010458 --- .../ops/parallel_for/control_flow_ops_test.py | 192 ++++++++++++++++-- tensorflow/python/ops/parallel_for/pfor.py | 98 ++++++++- 2 files changed, 261 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py index d403b0c61a..6e276dee55 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py @@ -31,6 +31,8 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops +from tensorflow.python.ops import bitwise_ops +from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gradients as gradient_ops @@ -300,28 +302,129 @@ class ArrayTest(PForTest): self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.float32] * 2) +class BitwiseTest(PForTest): + + def test_unary_cwise(self): + for op in [bitwise_ops.invert]: + x = random_ops.random_uniform([7, 3, 5], maxval=10, dtype=dtypes.int32) + + # pylint: disable=cell-var-from-loop + def loop_fn(i): + x1 = array_ops.gather(x, i) + return op(x1) + # pylint: enable=cell-var-from-loop + + self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.int32]) + + def test_binary_cwise(self): + binary_ops = [ + bitwise_ops.bitwise_and, + bitwise_ops.bitwise_or, + bitwise_ops.bitwise_xor, + bitwise_ops.left_shift, + bitwise_ops.right_shift, + ] + for op in binary_ops: + x = random_ops.random_uniform([7, 3, 5], maxval=10, dtype=dtypes.int32) + y = random_ops.random_uniform([3, 5], maxval=10, dtype=dtypes.int32) + + output_dtypes = [] + # pylint: disable=cell-var-from-loop + def loop_fn(i): + x1 = array_ops.gather(x, i) + y1 = array_ops.gather(y, i) + outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)] + del output_dtypes[:] + output_dtypes.extend([t.dtype for t in outputs]) + return outputs + # pylint: enable=cell-var-from-loop + self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=output_dtypes) + + class MathTest(PForTest): def test_unary_cwise_ops(self): - for op in [ - math_ops.tanh, nn.relu, math_ops.sigmoid, math_ops.negative, - math_ops.square - ]: + complex_ops = [ + math_ops.angle, + math_ops.imag, + math_ops.complex_abs, + math_ops.real, + math_ops.conj, + ] + real_ops = [ + lambda x: math_ops.acosh(1 + math_ops.square(x)), + math_ops.abs, + math_ops.acos, + math_ops.asin, + math_ops.asinh, + math_ops.atan, + math_ops.atanh, + math_ops.bessel_i0e, + math_ops.bessel_i1e, + math_ops.cos, + math_ops.cosh, + math_ops.digamma, + math_ops.erf, + math_ops.erfc, + math_ops.exp, + math_ops.expm1, + math_ops.inv, + math_ops.is_finite, + math_ops.is_inf, + math_ops.lgamma, + math_ops.log, + math_ops.log1p, + math_ops.neg, + math_ops.negative, + math_ops.reciprocal, + math_ops.rint, + math_ops.round, + math_ops.rsqrt, + math_ops.sigmoid, + math_ops.sign, + math_ops.sin, + math_ops.sinh, + math_ops.sqrt, + math_ops.square, + math_ops.tan, + math_ops.tanh, + math_ops.tanh, + nn.elu, + nn.relu, + nn.relu6, + nn.selu, + nn.softplus, + nn.softsign, + ] + for op in complex_ops + real_ops: x = random_ops.random_uniform([3, 5]) + if op in complex_ops: + y = random_ops.random_uniform([3, 5]) + x = math_ops.complex(x, y) # pylint: disable=cell-var-from-loop + output_dtypes = [] def loop_fn(i): x1 = array_ops.gather(x, i) - y = op(x1) - loss = math_ops.reduce_sum(y * y) - return op(x), y, gradient_ops.gradients(loss, x1) + y1 = op(x1) + outputs = [op(x), y1] + if y1.dtype == dtypes.float32: + loss = math_ops.reduce_sum(y1 * y1) + grad = gradient_ops.gradients(loss, x1) + if grad and grad[0] is not None: + outputs.extend(grad) + del output_dtypes[:] + output_dtypes.extend([t.dtype for t in outputs]) + return outputs # pylint: enable=cell-var-from-loop - self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.float32] * 3) + self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=output_dtypes) def test_unary_cwise_no_grad(self): - for op in [math_ops.ceil, math_ops.floor, math_ops.logical_not]: + for op in [math_ops.ceil, + math_ops.floor, + math_ops.logical_not]: x = random_ops.random_uniform([3, 5]) if op == math_ops.logical_not: x = x > 0 @@ -336,33 +439,80 @@ class MathTest(PForTest): def test_binary_cwise_ops(self): logical_ops = [ - math_ops.logical_and, math_ops.logical_or, math_ops.logical_xor - ] - bool_ops = [ - math_ops.less, math_ops.less_equal, math_ops.greater, - math_ops.greater_equal, math_ops.equal, math_ops.not_equal + math_ops.logical_and, + math_ops.logical_or, + math_ops.logical_xor ] + + # Wrapper functions restricting the range of inputs of zeta and polygamma. + def safe_polygamma(x, y): + return math_ops.polygamma( + math_ops.round(clip_ops.clip_by_value(y, 1, 10)), + x * x + 1) + + def safe_zeta(x, y): + return math_ops.zeta(x * x + 1, y * y) + float_ops = [ - math_ops.add, math_ops.subtract, math_ops.multiply, math_ops.divide, - math_ops.maximum, math_ops.minimum + math_ops.add, + math_ops.add_v2, + math_ops.atan2, + math_ops.complex, + math_ops.div, + math_ops.divide, + math_ops.div_no_nan, + math_ops.equal, + math_ops.floor_div, + math_ops.floor_mod, + math_ops.greater, + math_ops.greater_equal, + math_ops.igamma, + math_ops.igammac, + math_ops.igamma_grad_a, + math_ops.less, + math_ops.less_equal, + math_ops.maximum, + math_ops.minimum, + math_ops.mod, + math_ops.multiply, + math_ops.not_equal, + math_ops.pow, + math_ops.squared_difference, + math_ops.subtract, + math_ops.truncate_mod, + safe_polygamma, + safe_zeta, ] - for op in logical_ops + bool_ops + float_ops: + for op in logical_ops + float_ops: x = random_ops.random_uniform([7, 3, 5]) y = random_ops.random_uniform([3, 5]) if op in logical_ops: x = x > 0 y = y > 0 + output_dtypes = [] # pylint: disable=cell-var-from-loop def loop_fn(i): x1 = array_ops.gather(x, i) y1 = array_ops.gather(y, i) - return op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1) - + outputs = [op(x, y), op(x1, y), op(x, y1), op(x1, y1), op(x1, x1)] + del output_dtypes[:] + output_dtypes.extend([t.dtype for t in outputs]) + return outputs # pylint: enable=cell-var-from-loop - dtype = dtypes.float32 if op in float_ops else dtypes.bool - self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtype] * 5) + self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=output_dtypes) + + def test_approximate_equal(self): + x = random_ops.random_uniform([3, 5]) + y = random_ops.random_uniform([3, 5]) + + def loop_fn(i): + x1 = array_ops.gather(x, i) + y1 = array_ops.gather(y, i) + return math_ops.approximate_equal(x1, y1) + + self._test_loop_fn(loop_fn, 3, loop_fn_dtypes=[dtypes.bool]) def test_addn(self): x = random_ops.random_uniform([2, 3, 5]) diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py index f9153b6d7d..e0f6d51881 100644 --- a/tensorflow/python/ops/parallel_for/pfor.py +++ b/tensorflow/python/ops/parallel_for/pfor.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import bitwise_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops @@ -1922,37 +1923,114 @@ def _convert_cast(pfor_input): return wrap(math_ops.cast(inp, dtype), True) -# Note that ops handled here do not have attributes except "T", and hence don't -# need extra arguments passed to the cwise_op call below. +@RegisterPForWithArgs("Abs", math_ops.abs) +@RegisterPForWithArgs("Acosh", math_ops.acosh) +@RegisterPForWithArgs("Acos", math_ops.acos) @RegisterPForWithArgs("Add", math_ops.add) +@RegisterPForWithArgs("AddV2", math_ops.add_v2) +@RegisterPForWithArgs("Angle", math_ops.angle) +@RegisterPForWithArgs("Asinh", math_ops.asinh) +@RegisterPForWithArgs("Asin", math_ops.asin) +@RegisterPForWithArgs("Atan2", math_ops.atan2) +@RegisterPForWithArgs("Atanh", math_ops.atanh) +@RegisterPForWithArgs("Atan", math_ops.atan) +@RegisterPForWithArgs("BesselI0e", math_ops.bessel_i0e) +@RegisterPForWithArgs("BesselI1e", math_ops.bessel_i1e) +@RegisterPForWithArgs("BitwiseAnd", bitwise_ops.bitwise_and) +@RegisterPForWithArgs("BitwiseOr", bitwise_ops.bitwise_or) +@RegisterPForWithArgs("BitwiseXor", bitwise_ops.bitwise_xor) @RegisterPForWithArgs("Ceil", math_ops.ceil) +@RegisterPForWithArgs("ComplexAbs", math_ops.complex_abs) +@RegisterPForWithArgs("Complex", math_ops.complex) +@RegisterPForWithArgs("Conj", math_ops.conj) +@RegisterPForWithArgs("Cosh", math_ops.cosh) +@RegisterPForWithArgs("Cos", math_ops.cos) +@RegisterPForWithArgs("Digamma", math_ops.digamma) +@RegisterPForWithArgs("Div", math_ops.div) +@RegisterPForWithArgs("DivNoNan", math_ops.div_no_nan) +@RegisterPForWithArgs("Elu", nn_ops.elu) @RegisterPForWithArgs("Equal", math_ops.equal) -@RegisterPForWithArgs("NotEqual", math_ops.not_equal) +@RegisterPForWithArgs("Erfc", math_ops.erfc) +@RegisterPForWithArgs("Erf", math_ops.erf) +@RegisterPForWithArgs("Expm1", math_ops.expm1) +@RegisterPForWithArgs("Exp", math_ops.exp) +@RegisterPForWithArgs("FloorDiv", math_ops.floor_div) @RegisterPForWithArgs("Floor", math_ops.floor) -@RegisterPForWithArgs("Greater", math_ops.greater) +@RegisterPForWithArgs("FloorMod", math_ops.floor_mod) @RegisterPForWithArgs("GreaterEqual", math_ops.greater_equal) -@RegisterPForWithArgs("Less", math_ops.less) +@RegisterPForWithArgs("Greater", math_ops.greater) +@RegisterPForWithArgs("Igammac", math_ops.igammac) +@RegisterPForWithArgs("IgammaGradA", math_ops.igamma_grad_a) +@RegisterPForWithArgs("Igamma", math_ops.igamma) +@RegisterPForWithArgs("Imag", math_ops.imag) +@RegisterPForWithArgs("Invert", bitwise_ops.invert) +@RegisterPForWithArgs("Inv", math_ops.inv) +@RegisterPForWithArgs("IsFinite", math_ops.is_finite) +@RegisterPForWithArgs("IsInf", math_ops.is_inf) +@RegisterPForWithArgs("LeftShift", bitwise_ops.left_shift) @RegisterPForWithArgs("LessEqual", math_ops.less_equal) -@RegisterPForWithArgs("LogicalOr", math_ops.logical_or) +@RegisterPForWithArgs("Less", math_ops.less) +@RegisterPForWithArgs("Lgamma", math_ops.lgamma) +@RegisterPForWithArgs("Log1p", math_ops.log1p) @RegisterPForWithArgs("LogicalAnd", math_ops.logical_and) @RegisterPForWithArgs("LogicalNot", math_ops.logical_not) +@RegisterPForWithArgs("LogicalOr", math_ops.logical_or) @RegisterPForWithArgs("LogicalXor", math_ops.logical_xor) +@RegisterPForWithArgs("Log", math_ops.log) @RegisterPForWithArgs("Maximum", math_ops.maximum) @RegisterPForWithArgs("Minimum", math_ops.minimum) +@RegisterPForWithArgs("Mod", math_ops.mod) @RegisterPForWithArgs("Mul", math_ops.multiply) @RegisterPForWithArgs("Neg", math_ops.negative) +@RegisterPForWithArgs("NotEqual", math_ops.not_equal) +@RegisterPForWithArgs("Polygamma", math_ops.polygamma) +@RegisterPForWithArgs("Pow", math_ops.pow) @RegisterPForWithArgs("RealDiv", math_ops.divide) +@RegisterPForWithArgs("Real", math_ops.real) +@RegisterPForWithArgs("ReciprocalGrad", math_ops.reciprocal_grad) +@RegisterPForWithArgs("Reciprocal", math_ops.reciprocal) +@RegisterPForWithArgs("Relu6", nn_ops.relu6) @RegisterPForWithArgs("Relu", nn_ops.relu) +@RegisterPForWithArgs("RightShift", bitwise_ops.right_shift) +@RegisterPForWithArgs("Rint", math_ops.rint) +@RegisterPForWithArgs("Round", math_ops.round) +@RegisterPForWithArgs("RsqrtGrad", math_ops.rsqrt_grad) +@RegisterPForWithArgs("Rsqrt", math_ops.rsqrt) +@RegisterPForWithArgs("Selu", nn_ops.selu) @RegisterPForWithArgs("Sigmoid", math_ops.sigmoid) +@RegisterPForWithArgs("Sign", math_ops.sign) +@RegisterPForWithArgs("Sinh", math_ops.sinh) +@RegisterPForWithArgs("Sin", math_ops.sin) +@RegisterPForWithArgs("Softplus", nn_ops.softplus) +@RegisterPForWithArgs("Softsign", nn_ops.softsign) +@RegisterPForWithArgs("SqrtGrad", math_ops.sqrt_grad) +@RegisterPForWithArgs("Sqrt", math_ops.sqrt) +@RegisterPForWithArgs("SquaredDifference", math_ops.squared_difference) @RegisterPForWithArgs("Square", math_ops.square) @RegisterPForWithArgs("Sub", math_ops.subtract) @RegisterPForWithArgs("Tanh", math_ops.tanh) +@RegisterPForWithArgs("Tan", math_ops.tan) +@RegisterPForWithArgs("TruncateDiv", math_ops.truncate_div) +@RegisterPForWithArgs("TruncateMod", math_ops.truncate_mod) +@RegisterPForWithArgs("Zeta", math_ops.zeta) def _convert_cwise(pfor_input, op_type, op_func): - del op_type + # Note that ops handled here do not have attributes except "T" and "Tout", and + # hence don't need extra arguments passed to the cwise_op call below. + for attr in pfor_input.op.node_def.attr.keys(): + assert attr in [u"T", u"Tout"], (op_type, attr) pfor_input.expanddim_inputs_for_broadcast() return wrap(op_func(*[x.t for x in pfor_input.inputs]), True) +@RegisterPFor("ApproximateEqual") +def _convert_approximate_equal(pfor_input): + pfor_input.expanddim_inputs_for_broadcast() + x = pfor_input.input(0)[0] + y = pfor_input.input(1)[0] + tolerance = pfor_input.get_attr("tolerance") + return wrap(math_ops.approximate_equal(x, y, tolerance=tolerance), True) + + @RegisterPFor("Shape") def _convert_shape(pfor_input): out_type = pfor_input.get_attr("out_type") @@ -2009,10 +2087,14 @@ def _convert_biasaddgrad(pfor_input): # Some required ops are not exposed under the tf namespace. Hence relying on # _create_op to create them. +@RegisterPForWithArgs("EluGrad") +@RegisterPForWithArgs("Relu6Grad") @RegisterPForWithArgs("ReluGrad") -@RegisterPForWithArgs("TanhGrad") +@RegisterPForWithArgs("SeluGrad") @RegisterPForWithArgs("SigmoidGrad") @RegisterPForWithArgs("SoftplusGrad") +@RegisterPForWithArgs("SoftsignGrad") +@RegisterPForWithArgs("TanhGrad") def _convert_grads(pfor_input, op_type, *args, **kw_args): del args del kw_args -- GitLab From 9445d19a140561017992f0bf1364c9dc4733b7ca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 11:15:23 -0700 Subject: [PATCH 0207/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213012717 --- .../internal/reference/reference_ops.h | 140 ++++++++++++++---- .../contrib/lite/kernels/internal/types.h | 9 ++ 2 files changed, 119 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 111adbf5b3..2d552909a8 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -4487,34 +4487,70 @@ void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data, } template -void Transpose(const T* input, const Dims<4>& input_dims, T* output, - const Dims<4>& output_dims, const int* permuted_axes) { +void Transpose(const TransposeParams& params, + const RuntimeShape& unextended_input_shape, const T* input_data, + const RuntimeShape& unextended_output_shape, T* output_data) { + const int unextended_output_size = unextended_output_shape.DimensionsCount(); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_size, 4); + TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + const int input_ext_size = 4 - unextended_input_shape.DimensionsCount(); + const int output_ext_size = 4 - unextended_output_size; + + // The perm data is extended to match the output, each index incremented by + // the amount of front padding of the input shape. + int extended_perm[4]; + for (int i = 0; i < output_ext_size; ++i) { + extended_perm[i] = i; + } + for (int i = 0; i < unextended_output_size; ++i) { + extended_perm[i + output_ext_size] = params.perm[i] + input_ext_size; + } + int out_sizes[4]; // Compute the inverse permutation array so we can do an output centered // transpose. Also, check to make sure output_dims is matching input_dims. for (int k = 0; k < 4; k++) { - out_sizes[k] = - MatchingArraySize(input_dims, permuted_axes[k], output_dims, k); + out_sizes[k] = MatchingDim(input_shape, extended_perm[k], output_shape, k); } // Naive transpose loop (iterate on output index and compute input index). int o[4]; // loop index (on output). int i[4]; for (o[3] = 0; o[3] < out_sizes[3]; o[3]++) { - i[permuted_axes[3]] = o[3]; + i[extended_perm[3]] = o[3]; for (o[2] = 0; o[2] < out_sizes[2]; o[2]++) { - i[permuted_axes[2]] = o[2]; + i[extended_perm[2]] = o[2]; for (o[1] = 0; o[1] < out_sizes[1]; o[1]++) { - i[permuted_axes[1]] = o[1]; + i[extended_perm[1]] = o[1]; for (o[0] = 0; o[0] < out_sizes[0]; o[0]++) { - i[permuted_axes[0]] = o[0]; - output[Offset(output_dims, o)] = input[Offset(input_dims, i)]; + i[extended_perm[0]] = o[0]; + output_data[Offset(output_shape, o)] = + input_data[Offset(input_shape, i)]; } } } } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +template +void Transpose(const T* input, const Dims<4>& input_dims, T* output, + const Dims<4>& output_dims, const int* permuted_axes) { + TransposeParams params; + params.perm_count = 4; + for (int i = 0; i < 4; ++i) { + params.perm[i] = 3 - permuted_axes[3 - i]; + } + Transpose(params, DimsToShape(input_dims), input, DimsToShape(output_dims), + output); +} + inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, int stride_width, int stride_height, int pad_width, @@ -4927,48 +4963,82 @@ TFLITE_COMPARISON_OP(Less); TFLITE_COMPARISON_OP(LessEqual); #undef TFLITE_COMPARISON_OP +template +void Select(const RuntimeShape& input_condition_shape, + const D* input_condition_data, const RuntimeShape& input_x_shape, + const T* input_x_data, const RuntimeShape& input_y_shape, + const T* input_y_data, const RuntimeShape& output_shape, + T* output_data) { + const int64_t flatsize = MatchingFlatSize( + input_condition_shape, input_x_shape, input_y_shape, output_shape); + for (int64_t i = 0; i < flatsize; ++i) { + output_data[i] = + input_condition_data[i] ? input_x_data[i] : input_y_data[i]; + } +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. template inline void Select(const D* input_condition_data, const Dims<4>& input_condition_dims, const T* input_x_data, const Dims<4>& input_x_dims, const T* input_y_data, const Dims<4>& input_y_dims, T* output_data, const Dims<4>& output_dims) { - const int64_t flatsize = - MatchingFlatSize(input_x_dims, input_y_dims, output_dims); - for (int64_t i = 0; i < flatsize; ++i) { - output_data[i] = - input_condition_data[i] ? input_x_data[i] : input_y_data[i]; - } + Select(DimsToShape(input_condition_dims), input_condition_data, + DimsToShape(input_x_dims), input_x_data, DimsToShape(input_y_dims), + input_y_data, DimsToShape(output_dims), output_data); } template -inline void RankOneSelect(const D* input_condition_data, - const Dims<4>& input_condition_dims, - const T* input_x_data, const Dims<4>& input_x_dims, - const T* input_y_data, const Dims<4>& input_y_dims, - T* output_data, const Dims<4>& output_dims) { - const int64_t rank = MatchingArraySize(input_condition_dims, 0, input_x_dims, - 3, input_y_dims, 3, output_dims, 3); +void RankOneSelect(const RuntimeShape& input_condition_shape, + const D* input_condition_data, + const RuntimeShape& input_x_shape, const T* input_x_data, + const RuntimeShape& input_y_shape, const T* input_y_data, + const RuntimeShape& output_shape, T* output_data) { + const int64_t outer_size = input_condition_shape.FlatSize(); + TFLITE_DCHECK_EQ( + MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), + outer_size); const int64_t inner_size = - MatchingFlatSizeSkipDim(input_x_dims, 3, input_y_dims, output_dims); + MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape); int64_t offset = 0; - for (int64_t i = 0; i < rank; i++) { + for (int64_t i = 0; i < outer_size; i++) { const T* input_data = input_condition_data[i] ? input_x_data : input_y_data; memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T)); offset += inner_size; } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +template +inline void RankOneSelect(const D* input_condition_data, + const Dims<4>& input_condition_dims, + const T* input_x_data, const Dims<4>& input_x_dims, + const T* input_y_data, const Dims<4>& input_y_dims, + T* output_data, const Dims<4>& output_dims) { + RankOneSelect(DimsToShape(input_condition_dims), input_condition_data, + DimsToShape(input_x_dims), input_x_data, + DimsToShape(input_y_dims), input_y_data, + DimsToShape(output_dims), output_data); +} + // For easy implementation, the indices is always a vector of size-4 vectors. template inline void SparseToDense(const std::vector>& indices, - const T* values, T default_value, T* output_data, - const Dims<4>& output_dims, bool value_is_scalar) { + const T* values, T default_value, + bool value_is_scalar, + const RuntimeShape& unextended_output_shape, + T* output_data) { + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); const int value_count = indices.size(); // First fill the output_data with default value. - const int num_elements = FlatSize(output_dims); + const int num_elements = output_shape.FlatSize(); for (int i = 0; i < num_elements; ++i) { output_data[i] = default_value; } @@ -4980,8 +5050,8 @@ inline void SparseToDense(const std::vector>& indices, const std::vector& index = indices[i]; TFLITE_DCHECK_EQ(index.size(), 4); const T value = *values; // just use the first value. - output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] = - value; + output_data[Offset(output_shape, index[0], index[1], index[2], + index[3])] = value; } return; } @@ -4991,11 +5061,21 @@ inline void SparseToDense(const std::vector>& indices, const std::vector& index = indices[i]; TFLITE_DCHECK_EQ(index.size(), 4); const T value = values[i]; - output_data[Offset(output_dims, index[3], index[2], index[1], index[0])] = + output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value; } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +template +inline void SparseToDense(const std::vector>& indices, + const T* values, T default_value, T* output_data, + const Dims<4>& output_dims, bool value_is_scalar) { + SparseToDense(indices, values, default_value, value_is_scalar, + DimsToShape(output_dims), output_data); +} + template inline void Pow(const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index 87e8ff0346..fe84c1caca 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -361,6 +361,10 @@ inline int Offset(const Dims<4>& dims, int* index) { return Offset(dims, index[0], index[1], index[2], index[3]); } +inline int Offset(const RuntimeShape& shape, int* index) { + return Offset(shape, index[0], index[1], index[2], index[3]); +} + // Get array size, DCHECKing that the dim index is in range. // // Note that this will be phased out with Dims<4>, since RuntimeShape::Dims() @@ -936,6 +940,11 @@ struct TanhParams { int input_left_shift; }; +struct TransposeParams { + int8 perm_count; + int32 perm[4]; +}; + template inline void SetActivationParams(float min, float max, P* params) { params->float_activation_min = min; -- GitLab From 89f9080ed0d1a43cb2fa253997b2553c6916f364 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 14 Sep 2018 11:23:46 -0700 Subject: [PATCH 0208/1357] [XLA] Support strength reducing bfloat16 dot products There is no reason to limit ourselves to float32 dot product operations, we simply convert to and from float32 around the reduction to simulate the precision change. PiperOrigin-RevId: 213014410 --- .../xla/service/algebraic_simplifier.cc | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 5458159d14..4ef1dffa73 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -745,12 +745,24 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( } const int64 rhs_kept_dim = 1 - rhs_collapsing_dim; - auto reshape_if_necessary = [&](HloInstruction* hlo) { - if (ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) { + auto as_type = [&](HloInstruction* hlo, const PrimitiveType element_type) { + if (hlo->shape().element_type() == element_type) { return hlo; } - return computation_->AddInstruction( - HloInstruction::CreateReshape(dot->shape(), hlo)); + return computation_->AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::ChangeElementType(hlo->shape(), element_type), hlo)); + }; + + auto reshape_if_necessary = [&](HloInstruction* hlo) { + if (!ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) { + hlo = computation_->AddInstruction( + HloInstruction::CreateReshape(dot->shape(), hlo)); + } + return as_type(hlo, dot->shape().element_type()); + }; + + auto add_reduce_in_f32 = [&](HloInstruction* hlo, const int64 dim) { + return AddReduce(as_type(hlo, F32), dim); }; auto broadcast_to_dim = [&](HloInstruction* hlo, const Shape& shape, @@ -770,7 +782,7 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( if (ShapeUtil::Rank(rhs->shape()) == 1 && ShapeUtil::Rank(lhs->shape()) == 1) { TF_RETURN_IF_ERROR( - ReplaceInstruction(dot, reshape_if_necessary(AddReduce( + ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32( multiply(Flatten(lhs), Flatten(rhs)), 0)))); return true; } @@ -804,17 +816,17 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( (ShapeUtil::Rank(lhs->shape()) == 2 && lhs->shape().dimensions(lhs_kept_dim) == 1)) { if (ShapeUtil::Rank(rhs->shape()) == 1) { - TF_RETURN_IF_ERROR(ReplaceInstruction( - dot, - reshape_if_necessary(AddReduce(multiply(Flatten(lhs), rhs), 0)))); + TF_RETURN_IF_ERROR( + ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32( + multiply(Flatten(lhs), rhs), 0)))); return true; } TF_RETURN_IF_ERROR(ReplaceInstruction( - dot, reshape_if_necessary( - AddReduce(multiply(broadcast_to_dim(Flatten(lhs), rhs->shape(), - rhs_collapsing_dim), - rhs), - rhs_collapsing_dim)))); + dot, reshape_if_necessary(add_reduce_in_f32( + multiply(broadcast_to_dim(Flatten(lhs), rhs->shape(), + rhs_collapsing_dim), + rhs), + rhs_collapsing_dim)))); return true; } @@ -826,7 +838,7 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( (ShapeUtil::Rank(rhs->shape()) == 2 && rhs->shape().dimensions(rhs_kept_dim) == 1)) { TF_RETURN_IF_ERROR(ReplaceInstruction( - dot, reshape_if_necessary(AddReduce( + dot, reshape_if_necessary(add_reduce_in_f32( multiply(lhs, broadcast_to_dim(Flatten(rhs), lhs->shape(), lhs_collapsing_dim)), lhs_collapsing_dim)))); @@ -1061,7 +1073,8 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfGather( const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension); const int n = right_operand->shape().dimensions(1 - rhs_contracting_dimension); - auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n}); + auto memoized_shape = + ShapeUtil::MakeShape(dot->shape().element_type(), {m, n}); auto* memoized_inst = computation_->AddInstruction( HloInstruction::CreateDot(memoized_shape, left_operand, right_operand, dnums, dot->precision_config())); @@ -1109,10 +1122,12 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { HloInstruction *lhs, *rhs; CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs)))); - // Only optimize F32 dot operations where the dot, rhs and lhs are rank 2 or - // below. - if (dot->shape().element_type() != F32 || ShapeUtil::Rank(lhs->shape()) > 2 || - ShapeUtil::Rank(rhs->shape()) > 2 || ShapeUtil::Rank(dot->shape()) > 2) { + // Only optimize F32 or BF16 dot operations where the dot, rhs and lhs are + // rank 2 or below. + if ((dot->shape().element_type() != F32 && + dot->shape().element_type() != BF16) || + ShapeUtil::Rank(lhs->shape()) > 2 || ShapeUtil::Rank(rhs->shape()) > 2 || + ShapeUtil::Rank(dot->shape()) > 2) { return Status::OK(); } -- GitLab From c20a7b81d79d30db9e990309ddb419bcb48120cc Mon Sep 17 00:00:00 2001 From: Piotr Padlewski Date: Fri, 14 Sep 2018 11:28:28 -0700 Subject: [PATCH 0209/1357] [tf.data] Introducing an optimization that parallelizes map transformations. Stateless MapDatasets can be paralellized by switching to ParallelMapDataset. We set `num_parallel_calls` to 2 for now, but in the future a special value will be used that result in the optimal value to be selected dynamically at runtime. This patch also exposed a memory leak which was fixed. PiperOrigin-RevId: 213015223 --- .../python/kernel_tests/optimization/BUILD | 17 +++ .../optimization/map_parallelization_test.py | 84 ++++++++++++++ tensorflow/core/BUILD | 2 + .../core/common_runtime/direct_session.cc | 9 +- tensorflow/core/common_runtime/function.cc | 5 +- .../core/distributed_runtime/graph_mgr.cc | 8 +- tensorflow/core/framework/function.cc | 13 +++ tensorflow/core/framework/function_testlib.cc | 34 ++++++ tensorflow/core/framework/function_testlib.h | 3 + tensorflow/core/framework/op_kernel.cc | 11 +- tensorflow/core/framework/op_segment.cc | 8 ++ tensorflow/core/framework/op_segment.h | 4 + .../core/grappler/optimizers/data/BUILD | 44 +++++++- .../optimizers/data/map_parallelization.cc | 106 ++++++++++++++++++ .../optimizers/data/map_parallelization.h | 47 ++++++++ .../data/map_parallelization_test.cc | 94 ++++++++++++++++ 16 files changed, 461 insertions(+), 28 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py create mode 100644 tensorflow/core/grappler/optimizers/data/map_parallelization.cc create mode 100644 tensorflow/core/grappler/optimizers/data/map_parallelization.h create mode 100644 tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD index 7e9ea68047..b3187bf61b 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD @@ -73,6 +73,23 @@ py_test( ], ) +py_test( + name = "map_parallelization_test", + size = "small", + srcs = ["map_parallelization_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:optimization", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python/data/ops:dataset_ops", + "@absl_py//absl/testing:parameterized", + ], +) + py_test( name = "model_dataset_op_test", size = "medium", diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py new file mode 100644 index 0000000000..dd547db086 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_parallelization_test.py @@ -0,0 +1,84 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the MapParallelization optimization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.data.python.ops import optimization +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import test + + +class MapParallelizationTest(test.TestCase, parameterized.TestCase): + + @staticmethod + def map_functions(): + identity = lambda x: x + increment = lambda x: x + 1 + + def assert_greater(x): + assert_op = control_flow_ops.Assert(math_ops.greater(x, -1), [x]) + with ops.control_dependencies([assert_op]): + return x + + def random(_): + return random_ops.random_uniform([], + minval=0, + maxval=10, + dtype=dtypes.int64, + seed=42) + + def assert_with_random(x): + x = assert_greater(x) + return random(x) + + return (("Identity", identity, True), ("Increment", increment, True), + ("AssertGreater", assert_greater, True), ("Random", random, False), + ("AssertWithRandom", assert_with_random, False)) + + @parameterized.named_parameters(*map_functions.__func__()) + def testMapParallelization(self, function, should_optimize): + next_nodes = ["ParallelMap"] if should_optimize else ["Map"] + dataset = dataset_ops.Dataset.range(5).apply( + optimization.assert_next(next_nodes)).map(function).apply( + optimization.optimize(["map_parallelization"])) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + + with self.test_session() as sess: + for x in range(5): + result = sess.run(get_next) + # No need to run the pipeline if it was not optimized. Also the results + # might be hard to check because of random. + if not should_optimize: + return + r = function(x) + self.assertAllEqual(r, result) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1a86bff5cd..55715bb3a6 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1429,9 +1429,11 @@ cc_library( ":test", ":testlib_ops", "//tensorflow/cc:scope", + "//tensorflow/core/kernels:cast_op", "//tensorflow/core/kernels:constant_op", "//tensorflow/core/kernels:ops_testutil", "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/kernels:random_ops", ], ) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index b4d8e285bd..af5d5b17e7 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1202,14 +1202,11 @@ Status DirectSession::CreateExecutors( auto opseg = device->op_segment(); params.create_kernel = [this, lib, opseg](const NodeDef& ndef, OpKernel** kernel) { - // We do not share the kernel via the OpSegment if the node is - // stateless, or a function. // NOTE(mrry): We must not share function kernels (implemented // using `CallOp`) between subgraphs, because `CallOp::handle_` // is tied to a particular subgraph. Even if the function itself // is stateful, the `CallOp` that invokes it is not. - if (!lib->IsStateful(ndef.op()) || - lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) { + if (!OpSegment::ShouldOwnKernel(lib, ndef.op())) { return lib->CreateKernel(ndef, kernel); } auto create_fn = [lib, &ndef](OpKernel** kernel) { @@ -1222,10 +1219,8 @@ Status DirectSession::CreateExecutors( create_fn); }; params.delete_kernel = [lib](OpKernel* kernel) { - // If the node is stateful, opseg owns it. Otherwise, delete it. - if (kernel && !lib->IsStateful(kernel->type_string())) { + if (kernel && !OpSegment::ShouldOwnKernel(lib, kernel->type_string())) delete kernel; - } }; optimizer.Optimize(lib, options_.env, device, &partition_graph, diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 1c9b69721d..472865ca43 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -414,9 +414,8 @@ Status FunctionLibraryRuntimeImpl::CreateKernel( device_type, device_, device_->GetAllocator(AllocatorAttributes()), &ndef, &fbody->fdef.signature(), this, fbody->arg_types, input_memory_types, fbody->ret_types, output_memory_types, graph_def_version_, &s); - *kernel = new CallOp(handle, &construction); - if (!s.ok()) { - delete *kernel; + if (s.ok()) { + *kernel = new CallOp(handle, &construction); } return s; } diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 6c146036ae..f7a2967d00 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -233,14 +233,11 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef, params.function_library = lib; params.create_kernel = [session, lib, opseg](const NodeDef& ndef, OpKernel** kernel) { - // We do not share the kernel via the OpSegment if the node is - // stateless, or a function. // NOTE(mrry): We must not share function kernels (implemented // using `CallOp`) between subgraphs, because `CallOp::handle_` // is tied to a particular subgraph. Even if the function itself // is stateful, the `CallOp` that invokes it is not. - if (!lib->IsStateful(ndef.op()) || - lib->GetFunctionLibraryDefinition()->Find(ndef.op()) != nullptr) { + if (!OpSegment::ShouldOwnKernel(lib, ndef.op())) { return lib->CreateKernel(ndef, kernel); } auto create_fn = [lib, &ndef](OpKernel** kernel) { @@ -252,8 +249,7 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef, return opseg->FindOrCreate(session, ndef.name(), kernel, create_fn); }; params.delete_kernel = [lib](OpKernel* kernel) { - // If the node is stateful, opseg owns it. Otherwise, delete it. - if (kernel && !lib->IsStateful(kernel->type_string())) { + if (kernel && !OpSegment::ShouldOwnKernel(lib, kernel->type_string())) { delete kernel; } }; diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc index d979353d2f..a17959a448 100644 --- a/tensorflow/core/framework/function.cc +++ b/tensorflow/core/framework/function.cc @@ -1294,6 +1294,18 @@ FunctionDef FunctionDefHelper::Create( for (const auto& r : ret_def) { fdef.mutable_ret()->insert({r.first, r.second}); } + + auto* op_def_registry = OpRegistry::Global(); + // Check if any op is stateful. + for (const auto& n : node_def) { + const OpDef* op_def = nullptr; + auto status = op_def_registry->LookUpOpDef(n.op, &op_def); + // Lookup can fail if e.g. we are calling a function that was not yet + // defined. If it happens, conservatively assume the op is stateful. + if (!status.ok() || op_def->is_stateful()) { + fdef.mutable_signature()->set_is_stateful(true); + } + } return fdef; } @@ -1355,6 +1367,7 @@ FunctionDef FunctionDefHelper::Define(const string& name, strings::StrCat(src.ret[0], ":", o.first, ":", i - o.second.first); } } + if (op_def->is_stateful()) fdef.mutable_signature()->set_is_stateful(true); } // Returns diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc index c5a4f661d2..d5c203d276 100644 --- a/tensorflow/core/framework/function_testlib.cc +++ b/tensorflow/core/framework/function_testlib.cc @@ -91,6 +91,40 @@ FunctionDef IsZero() { }); } +FunctionDef RandomUniform() { + const Tensor kZero = test::AsScalar(0); + const Tensor kTen = test::AsScalar(10); + + return FDH::Define( + // Name + "RandomUniform", + // Args + {"x: T"}, + // Return values + {"random_uniform: int64"}, + // Attr def + {"T:{float, double, int32, int64, string}"}, + {{{"random_uniform/shape"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT64}}}, + {{"random_uniform/min"}, + "Const", + {}, + {{"value", kZero}, {"dtype", DT_INT64}}}, + {{"random_uniform/max"}, + "Const", + {}, + {{"value", kTen}, {"dtype", DT_INT64}}}, + {{"random_uniform"}, + "RandomUniformInt", + {}, + {{"T", DT_INT64}, + {"Tout", DT_INT64}, + {"seed", 87654321}, + {"seed2", 42}}}}); +} + FunctionDef XTimesTwo() { const Tensor kTwo = test::AsScalar(2); return FDH::Define( diff --git a/tensorflow/core/framework/function_testlib.h b/tensorflow/core/framework/function_testlib.h index ad61a76f16..a01743423b 100644 --- a/tensorflow/core/framework/function_testlib.h +++ b/tensorflow/core/framework/function_testlib.h @@ -84,6 +84,9 @@ FunctionDef NonZero(); // x: T -> bool. FunctionDef IsZero(); +// x: T -> int64 +FunctionDef RandomUniform(); + // x:T, y:T -> y:T, x:T FunctionDef Swap(); diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index c694e10193..80f2b12987 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { @@ -80,10 +81,8 @@ Status MatchSignatureHelper(const DataTypeSlice expected_inputs, // OpKernel ------------------------------------------------------------------ -// TODO(mrry): Convert to std::make_unique when available. OpKernel::OpKernel(OpKernelConstruction* context) - : OpKernel(context, - std::unique_ptr(new NodeDef(context->def()))) {} + : OpKernel(context, MakeUnique(context->def())) {} OpKernel::OpKernel(OpKernelConstruction* context, std::unique_ptr node_def) @@ -525,10 +524,8 @@ std::unique_ptr OpKernelContext::forward_input( return nullptr; } } - // TODO(rmlarsen): Use MakeUnique here. There is already a copy in - // tensorflow/compiler/xla/ptr_util.h. Perhaps this should be part of - // general cleanup of ownership in this code. - std::unique_ptr output_tensor(new Tensor()); + + auto output_tensor = MakeUnique(); CHECK(output_tensor->CopyFrom(*input.tensor, output_shape)); return output_tensor; } diff --git a/tensorflow/core/framework/op_segment.cc b/tensorflow/core/framework/op_segment.cc index dfc5aa7747..75ed4a4eaf 100644 --- a/tensorflow/core/framework/op_segment.cc +++ b/tensorflow/core/framework/op_segment.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/op_segment.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -99,4 +100,11 @@ void OpSegment::RemoveHold(const string& session_handle) { delete item; } +bool OpSegment::ShouldOwnKernel(FunctionLibraryRuntime* lib, + const string& node_op) { + // OpSegment should not own kernel if the node is stateless, or a function. + return lib->IsStateful(node_op) && + lib->GetFunctionLibraryDefinition()->Find(node_op) == nullptr; +} + } // end namespace tensorflow diff --git a/tensorflow/core/framework/op_segment.h b/tensorflow/core/framework/op_segment.h index 4433a2554f..37d939ea2b 100644 --- a/tensorflow/core/framework/op_segment.h +++ b/tensorflow/core/framework/op_segment.h @@ -60,6 +60,10 @@ class OpSegment { Status FindOrCreate(const string& session_handle, const string& node_name, OpKernel** kernel, CreateKernelFn create_fn); + // Returns true if OpSegment should own the kernel. + static bool ShouldOwnKernel(FunctionLibraryRuntime* lib, + const string& node_op); + private: // op name -> OpKernel typedef std::unordered_map KernelMap; diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 530c957068..e84df10778 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -19,7 +19,6 @@ cc_library( "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", - "//tensorflow/core/kernels:cast_op", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", @@ -56,8 +55,8 @@ cc_library( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", - "//tensorflow/core/kernels:cast_op", "//tensorflow/core/kernels:functional_ops", + "//tensorflow/core/kernels:control_flow_ops", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", "//tensorflow/core:lib_internal", ] + tf_protos_all(), @@ -107,7 +106,6 @@ tf_cc_test( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", - "//tensorflow/core/kernels:cast_op", ], ) @@ -164,7 +162,6 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", - "//tensorflow/core/kernels:cast_op", # Must be linked for the testlib functions to work. ], ) @@ -256,7 +253,6 @@ cc_library( "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", - "//tensorflow/core/kernels:cast_op", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", @@ -275,6 +271,43 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/kernels:control_flow_ops", + ], +) + +cc_library( + name = "map_parallelization", + srcs = ["map_parallelization.cc"], + hdrs = [ + "map_parallelization.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + "//tensorflow/core/grappler:mutable_graph_view", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/utils:topological_sort", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + ] + tf_protos_all(), +) + +tf_cc_test( + name = "map_parallelization_test", + srcs = ["map_parallelization_test.cc"], + visibility = ["//visibility:public"], + deps = [ + ":graph_utils", + ":map_parallelization", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", ], ) @@ -355,6 +388,7 @@ cc_library( ":map_and_batch_fusion", ":map_and_filter_fusion", ":map_fusion", + ":map_parallelization", ":map_vectorization", ":noop_elimination", ":shuffle_and_repeat_fusion", diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization.cc b/tensorflow/core/grappler/optimizers/data/map_parallelization.cc new file mode 100644 index 0000000000..305325e434 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_parallelization.cc @@ -0,0 +1,106 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/map_parallelization.h" + +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/mutable_graph_view.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/grappler/utils.h" + +namespace tensorflow { +namespace grappler { +namespace { + +bool CanParallelize(const FunctionDef& function, + const FunctionLibraryDefinition& library) { + if (!function.signature().is_stateful()) return true; + + for (const auto& node : function.node_def()) { + const OpDef* op_def; + TF_CHECK_OK(library.LookUpOpDef(node.op(), &op_def)); + // Assert is marked as stateful, but it does not have any state (except + // changing io). Similarly to CUDA, we do not give guarantee that the + // assert operation that would fail would be the first one, so that we can + // parallelize it. + if (op_def->is_stateful() && op_def->name() != "Assert") return false; + } + + return true; +} + +NodeDef MakeParallelMap(const NodeDef& map_node, MutableGraphView* graph) { + NodeDef parallel_map = map_node; + graph_utils::SetUniqueGraphNodeName("parallel_map", graph->GetGraph(), + ¶llel_map); + parallel_map.set_op("ParallelMapDataset"); + // TODO(b/114475558): We want to set `num_parallel_calls` to a special value, + // so that dynamic tunning will pick the optimal value at runtime. Because + // this feature is not yet implemented, we set it to 2, which is the smallest + // value that introduces parallelism. + auto* num_parallel_calls = graph_utils::AddScalarConstNode(2, graph); + parallel_map.add_input(num_parallel_calls->name()); + + return parallel_map; +} + +} // namespace + +Status MapParallelization::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) { + *output = item.graph; + MutableGraphView graph(output); + std::set nodes_to_delete; + FunctionLibraryDefinition function_library(OpRegistry::Global(), + item.graph.library()); + auto get_map_node = [](const NodeDef& node) -> const NodeDef* { + if (node.op() == "MapDataset") return &node; + return nullptr; + }; + + for (const NodeDef& node : item.graph.node()) { + const NodeDef* map_node = get_map_node(node); + if (!map_node) continue; + + auto* function = + function_library.Find(map_node->attr().at("f").func().name()); + if (!CanParallelize(*function, function_library)) continue; + + auto* parallel_map = graph.AddNode(MakeParallelMap(*map_node, &graph)); + graph.ReplaceInput(*map_node, *parallel_map); + + // TODO(prazek): we could also remove map functions from library if they + // are not used anymore. + nodes_to_delete.insert(map_node->name()); + } + + graph.DeleteNodes(nodes_to_delete); + return Status::OK(); +} + +void MapParallelization::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, + double result) { + // no-op +} + +REGISTER_GRAPH_OPTIMIZER_AS(MapParallelization, "map_parallelization"); + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization.h b/tensorflow/core/grappler/optimizers/data/map_parallelization.h new file mode 100644 index 0000000000..ac9cf7e12a --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_parallelization.h @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_PARALLELIZATION_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_PARALLELIZATION_H_ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +// This optimization parallelizes MapDataset when function is stateless. +class MapParallelization : public CustomGraphOptimizer { + public: + MapParallelization() = default; + ~MapParallelization() override = default; + + string name() const override { return "map_parallelization"; }; + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* output) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_MAP_PARALLELIZATION_H_ diff --git a/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc b/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc new file mode 100644 index 0000000000..b2a5d9b6af --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc @@ -0,0 +1,94 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/map_parallelization.h" + +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +NodeDef MakeMapNode(StringPiece name, StringPiece input_node_name, + StringPiece function_name) { + return test::function::NDef( + name, "MapDataset", {string(input_node_name)}, + {{"f", FunctionDefHelper::FunctionRef(string(function_name))}, + {"Targuments", {}}, + {"output_shapes", {}}, + {"output_types", {}}}); +} + +const char stateless_fun_name[] = "XTimesTwo"; +const char stateful_fun_name[] = "RandomUniform"; + +TEST(MapParallelizationTest, ParallelizeSimpleMap) { + using test::function::NDef; + GrapplerItem item; + item.graph = test::function::GDef( + {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}), + NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}), + NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), + NDef("range", "RangeDataset", {"start", "stop", "step"}, {}), + MakeMapNode("map1", "range", stateless_fun_name)}, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + MapParallelization optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map1", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map2", output)); +} + +TEST(MapParallelization, ParallelizeAssert) { + using test::function::NDef; + GrapplerItem item; + item.graph = test::function::GDef( + {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}), + NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}), + NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), + NDef("filename", "Const", {}, {{"value", ""}, {"dtype", DT_STRING}}), + NDef("range", "RangeDataset", {"start", "stop", "step"}, {}), + MakeMapNode("map1", "range", stateful_fun_name), + MakeMapNode("map2", "map1", stateless_fun_name), + NDef("cache", "CacheDataset", {"map2", "filename"}, {})}, + // FunctionLib + { + test::function::XTimesTwo(), + test::function::RandomUniform(), + }); + + MapParallelization optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("map1", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map2", output)); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow -- GitLab From 39f50af5634b8a4d2132b57bad2152308a0fd41c Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Fri, 14 Sep 2018 11:42:02 -0700 Subject: [PATCH 0210/1357] Improve output parsing for unsupported ops PiperOrigin-RevId: 213017532 --- .../contrib/lite/toco/import_tensorflow.cc | 82 ++++++++++++------- .../lite/toco/import_tensorflow_test.cc | 52 ++++++++++++ 2 files changed, 104 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index efc1007925..2ccfd36b7c 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -69,6 +69,13 @@ bool HasAttr(const NodeDef& node, const string& attr_name) { return node.attr().count(attr_name) > 0; } +bool HasWildcardDimension(const TensorShapeProto& shape) { + for (const auto& dim : shape.dim()) { + if (dim.size() == -1) return true; + } + return false; +} + const string& GetStringAttr(const NodeDef& node, const string& attr_name) { CHECK(HasAttr(node, attr_name)); const auto& attr = node.attr().at(attr_name); @@ -1054,15 +1061,27 @@ tensorflow::Status ConvertUnsupportedOperator( "_support_output_type_float_in_quantized_op"; LOG(INFO) << "Converting unsupported operation: " << node.op(); + auto* op = new TensorFlowUnsupportedOperator; + op->tensorflow_op = node.op(); + node.SerializeToString(&op->tensorflow_node_def); + model->operators.emplace_back(op); + + // Parse inputs. const int num_inputs = GetInputsCount(node, tf_import_flags); for (int i = 0; i < num_inputs; ++i) { op->inputs.push_back(node.input(i)); } - op->outputs.push_back(node.name()); - op->tensorflow_op = node.op(); - node.SerializeToString(&op->tensorflow_node_def); - model->operators.emplace_back(op); + + // Parse outputs. + op->outputs.push_back(node.name()); // Implicit :0. + const tensorflow::OpDef* op_def = nullptr; + if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) { + for (int i = 1; i < op_def->output_arg_size(); ++i) { + op->outputs.push_back(absl::StrCat(node.name(), ":", i)); + } + } + // Parse if the op supports quantization if (HasAttr(node, kAttrOutputQuantized)) { op->quantized = GetBoolAttr(node, kAttrOutputQuantized); @@ -1072,6 +1091,8 @@ tensorflow::Status ConvertUnsupportedOperator( op->support_output_type_float_in_quantized_op = GetBoolAttr(node, kAttrSupportOutputTypeFloatInQuantizedOp); } + + // Parse output type(s). if (HasAttr(node, kAttrOutputTypes)) { const auto& output_types = GetListAttr(node, kAttrOutputTypes); for (int i = 0; i < output_types.type_size(); ++i) { @@ -1080,33 +1101,40 @@ tensorflow::Status ConvertUnsupportedOperator( } else if (HasAttr(node, "Tout")) { const auto& output_type = GetDataTypeAttr(node, "Tout"); op->output_data_types.push_back(ConvertDataType(output_type)); - } else { - const tensorflow::OpDef* op_def = nullptr; - if (OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) { - for (const auto& output_arg : op_def->output_arg()) { - if (HasAttr(node, output_arg.type_attr())) { - op->output_data_types.push_back( - ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr()))); - } else { - LOG(INFO) << "Op node missing output type attribute: " << node.name(); - op->output_data_types.clear(); - break; - } + } else if (op_def != nullptr) { + for (const auto& output_arg : op_def->output_arg()) { + if (HasAttr(node, output_arg.type_attr())) { + op->output_data_types.push_back( + ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr()))); + } else { + LOG(INFO) << "Op node missing output type attribute: " << node.name(); + op->output_data_types.clear(); + break; } } - if (op->output_data_types.empty()) { - // TODO(b/113613439): Figure out how to propagate types for custom ops - // that have no OpDef. - LOG(INFO) << "Unable to determine output type for op: " << node.op(); - } + } else { + // TODO(b/113613439): Figure out how to propagate types for custom ops + // that have no OpDef. + LOG(INFO) << "Unable to determine output type for op: " << node.op(); } + + // Parse output shape(s). if (HasAttr(node, kAttrOutputShapes)) { const auto& output_shapes = GetListAttr(node, kAttrOutputShapes); Shape output_shape; for (int i = 0; i < output_shapes.shape_size(); ++i) { + const auto& shape = output_shapes.shape(i); + // TOCO doesn't yet properly handle shapes with wildcard dimensions. + // TODO(b/113613439): Handle shape inference for unsupported ops that have + // shapes with wildcard dimensions. + if (HasWildcardDimension(shape)) { + LOG(INFO) << "Skipping wildcard output shape(s) for node: " + << node.name(); + op->output_shapes.clear(); + break; + } const auto status = - ImportShape(output_shapes.shape(i).dim(), /*input_flat_size=*/nullptr, - &output_shape); + ImportShape(shape.dim(), /*input_flat_size=*/nullptr, &output_shape); if (!status.ok()) { return status; } @@ -1159,15 +1187,9 @@ tensorflow::Status ConvertPlaceholderOperator( if (node.attr().count("shape")) { const auto& shape = GetShapeAttr(node, "shape"); auto num_dims = shape.dim_size(); - bool has_wildcard = false; - for (std::size_t i = 0; i < num_dims; i++) { - if (shape.dim(i).size() == -1) { - has_wildcard = true; - } - } // TODO(b/62716978): This logic needs to be revisted. During dims // refactoring it is an interim fix. - if (num_dims > 0 && !has_wildcard) { + if (num_dims > 0 && !HasWildcardDimension(shape)) { auto& dst_array_dims = *array.mutable_shape()->mutable_dims(); dst_array_dims.resize(num_dims); for (std::size_t i = 0; i < num_dims; i++) { diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc index da248826a7..8a236d4444 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc @@ -60,6 +60,28 @@ Status ImportNode(const NodeDef& node) { return ImportNode(node, &model); } +NodeDef BuildNode( + const std::string& op, + const std::vector>& output_shapes) { + NodeDef node; + node.set_op(op); + node.set_name("Node1"); + node.add_input(); + node.set_input(0, "Node0"); + + AttrValue::ListValue* shapes = + (*node.mutable_attr())["_output_shapes"].mutable_list(); + for (const auto& output_shape : output_shapes) { + tensorflow::TensorShapeProto* shape = shapes->add_shape(); + for (int64_t output_shape_dim : output_shape) { + auto shape_dim = shape->add_dim(); + shape_dim->set_size(output_shape_dim); + } + } + + return node; +} + class ShapeImportTest : public ::testing::TestWithParam { protected: ShapeImportTest() {} @@ -232,5 +254,35 @@ TEST(ImportTest, FailedTypeInference) { ASSERT_TRUE(op->output_data_types.empty()); } +TEST(ImportTest, UnsupportedOpWithOutputShapes) { + // Create an unsupported op with output shapes. + Model model; + EXPECT_TRUE(ImportNode(BuildNode("Atan", {{1, 2}, {2, 3}}), &model).ok()); + ASSERT_THAT(model.operators.size(), ::testing::Ge(1)); + ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported); + const TensorFlowUnsupportedOperator* op = + static_cast( + model.operators[0].get()); + + // The output shapes should be imported. + ASSERT_EQ(op->output_shapes.size(), 2); + ASSERT_THAT(op->output_shapes[0].dims(), ::testing::ElementsAre(1, 2)); + ASSERT_THAT(op->output_shapes[1].dims(), ::testing::ElementsAre(2, 3)); +} + +TEST(ImportTest, UnsupportedOpWithWildcardOutputShapes) { + // Create an unsupported op with wildcard output shapes. + Model model; + EXPECT_TRUE(ImportNode(BuildNode("Atan", {{-1, 2}}), &model).ok()); + ASSERT_THAT(model.operators.size(), ::testing::Ge(1)); + ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported); + const TensorFlowUnsupportedOperator* op = + static_cast( + model.operators[0].get()); + + // Wildcard shapes aren't yet supported. + ASSERT_TRUE(op->output_shapes.empty()); +} + } // namespace } // namespace toco -- GitLab From ba30af2c475ebd62ad7d75f056dba4f9d09030a8 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 14 Sep 2018 12:11:33 -0700 Subject: [PATCH 0211/1357] [TF:XLA] Bump open source llvm revision to r342210 PiperOrigin-RevId: 213022233 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 25698da1c9..4ca083c8a3 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -491,11 +491,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/738b5f5028ef39cbb023967f80fa2e5dd568556b.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/738b5f5028ef39cbb023967f80fa2e5dd568556b.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/ad72545325c087661feb3512efa54ebe5f888736.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/ad72545325c087661feb3512efa54ebe5f888736.tar.gz", ], - sha256 = "2bda8dd724ab432c162fb6eace259ccf8a97f13cb627336611bff68da2f33ec2", - strip_prefix = "llvm-738b5f5028ef39cbb023967f80fa2e5dd568556b", + sha256 = "66ed69443af00fbf9b912edbb6bc0fa796a12766b5e9ad504eb6b20f813dc163", + strip_prefix = "llvm-ad72545325c087661feb3512efa54ebe5f888736", build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"), ) -- GitLab From 8c2159a10e53e5301ae26c739a3d09fa53d3352e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 12:19:16 -0700 Subject: [PATCH 0212/1357] Updates to parameters, and to kernel helper functions. PiperOrigin-RevId: 213023245 --- .../lite/kernels/internal/optimized/optimized_ops.h | 5 ----- .../lite/kernels/internal/reference/reference_ops.h | 5 ----- tensorflow/contrib/lite/kernels/internal/types.h | 9 ++++++++- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 464207d739..8962d830a3 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -90,11 +90,6 @@ using reference_ops::Transpose; // Used mainly to convert from old-style shifts (right) to new-style (left). static constexpr int kReverseShift = -1; -inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) { - return RuntimeShape( - {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); -} - // Make a local VectorMap typedef allowing to map a float array // as a Eigen vector expression. The std::conditional here is to // construct the suitable Eigen type for the constness of the diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 2d552909a8..77927af227 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -105,11 +105,6 @@ namespace reference_ops { // Used mainly to convert from old-style shifts (right) to new-style (left). static constexpr int kReverseShift = -1; -inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) { - return RuntimeShape( - {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); -} - inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) { shape->BuildFrom( {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index fe84c1caca..f6636acc58 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -283,6 +283,12 @@ inline tflite::Dims<4> ToRuntimeDims(const tflite::RuntimeShape& array_shape) { return result; } +// TODO(b/80418076): Move to legacy ops file, update invocations. +inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) { + return RuntimeShape( + {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); +} + // Gets next index to iterate through a multidimensional array. inline bool NextIndex(const int num_dims, const int* dims, int* current) { if (num_dims == 0) { @@ -764,7 +770,8 @@ struct DepthToSpaceParams { struct DepthwiseParams { PaddingType padding_type; PaddingValues padding_values; - int16 stride; + int16 stride_width; + int16 stride_height; int16 depth_multiplier; // uint8 inference params. // TODO(b/65838351): Use smaller types if appropriate. -- GitLab From 7023196f46e92cb393dad03faff294b370dfd786 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 12:20:23 -0700 Subject: [PATCH 0213/1357] Automated rollback of commit 5f28bab20d303e9f815bbe8611c24b7f751e6f9e PiperOrigin-RevId: 213023382 --- tensorflow/python/ops/math_ops.py | 34 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 7c59232e40..acd5a32e82 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2903,24 +2903,22 @@ def tensordot(a, b, axes, name=None): free_dims_static = None shape_a = array_ops.shape(a) rank_a = array_ops.rank(a) - # TODO(b/115583659): Automate this. - with ops.device("/cpu:0"): - axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") - axes = cast(axes >= 0, dtypes.int32) * axes + cast( - axes < 0, dtypes.int32) * ( - axes + rank_a) - free, _ = array_ops.setdiff1d(range(rank_a), axes) - free_dims = array_ops.gather(shape_a, free) - axes_dims = array_ops.gather(shape_a, axes) - prod_free_dims = reduce_prod(free_dims) - prod_axes_dims = reduce_prod(axes_dims) - perm = array_ops.concat([axes_dims, free_dims], 0) - if flipped: - perm = array_ops.concat([axes, free], 0) - new_shape = array_ops.stack([prod_axes_dims, prod_free_dims]) - else: - perm = array_ops.concat([free, axes], 0) - new_shape = array_ops.stack([prod_free_dims, prod_axes_dims]) + axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") + axes = cast(axes >= 0, dtypes.int32) * axes + cast( + axes < 0, dtypes.int32) * ( + axes + rank_a) + free, _ = array_ops.setdiff1d(range(rank_a), axes) + free_dims = array_ops.gather(shape_a, free) + axes_dims = array_ops.gather(shape_a, axes) + prod_free_dims = reduce_prod(free_dims) + prod_axes_dims = reduce_prod(axes_dims) + perm = array_ops.concat([axes_dims, free_dims], 0) + if flipped: + perm = array_ops.concat([axes, free], 0) + new_shape = array_ops.stack([prod_axes_dims, prod_free_dims]) + else: + perm = array_ops.concat([free, axes], 0) + new_shape = array_ops.stack([prod_free_dims, prod_axes_dims]) reshaped_a = array_ops.reshape(array_ops.transpose(a, perm), new_shape) return reshaped_a, free_dims, free_dims_static -- GitLab From cba65fbcecb828a3e6e7743f7e784c7d08d37ffb Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 14 Sep 2018 12:34:21 -0700 Subject: [PATCH 0214/1357] Define PreferBlockAccess enum to prepare for Eigen upgrade. PiperOrigin-RevId: 213025676 --- tensorflow/core/kernels/eigen_volume_patch.h | 1 + tensorflow/core/kernels/mirror_pad_op.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/eigen_volume_patch.h b/tensorflow/core/kernels/eigen_volume_patch.h index a3d795813d..80ab745bfe 100644 --- a/tensorflow/core/kernels/eigen_volume_patch.h +++ b/tensorflow/core/kernels/eigen_volume_patch.h @@ -43,6 +43,7 @@ struct CustomTensorEvaluator { IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = NumDims == 6, RawAccess = false diff --git a/tensorflow/core/kernels/mirror_pad_op.h b/tensorflow/core/kernels/mirror_pad_op.h index cc4b6941b9..62aa7d5c29 100644 --- a/tensorflow/core/kernels/mirror_pad_op.h +++ b/tensorflow/core/kernels/mirror_pad_op.h @@ -103,6 +103,7 @@ struct TensorEvaluator, IsAligned = false, PacketAccess = TensorEvaluator::PacketAccess, BlockAccess = false, + PreferBlockAccess = false, Layout = TensorEvaluator::Layout, CoordAccess = true, RawAccess = false -- GitLab From 9da83f0701bcece95372ee8da09f886dfd2fa2a1 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 14 Sep 2018 12:36:51 -0700 Subject: [PATCH 0215/1357] Make ReLU layer use nn.leaky_relu when appropriate. PiperOrigin-RevId: 213026080 --- tensorflow/python/keras/backend.py | 6 +++++- tensorflow/python/keras/backend_test.py | 3 ++- .../python/keras/layers/advanced_activations.py | 12 +++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 529b07dc12..5e1722ba20 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -3459,14 +3459,18 @@ def relu(x, alpha=0., max_value=None, threshold=0): Returns: A tensor. """ - clip_max = max_value is not None if alpha != 0.: + if max_value is None and threshold == 0: + return nn.leaky_relu(x, alpha=alpha) + if threshold != 0: negative_part = nn.relu(-x + threshold) else: negative_part = nn.relu(-x) + clip_max = max_value is not None + if threshold != 0: # computes x for x > threshold else 0 x = x * math_ops.cast(math_ops.greater(x, threshold), floatx()) diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 2f271c4f50..ab71589940 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -522,8 +522,9 @@ class BackendLinearAlgebraTest(test.TestCase): relu_op = keras.backend.relu(x) self.assertAllClose(keras.backend.eval(relu_op), [[0, 0], [2, 7]]) - # alpha + # alpha (leaky relu used) relu_op = keras.backend.relu(x, alpha=0.5) + self.assertTrue('LeakyRelu' in relu_op.name) self.assertAllClose(keras.backend.eval(relu_op), [[-2, 0], [2, 7]]) # max_value < some elements diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py index 61ab69c16f..4ab786a184 100644 --- a/tensorflow/python/keras/layers/advanced_activations.py +++ b/tensorflow/python/keras/layers/advanced_activations.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.keras import activations from tensorflow.python.keras import backend as K from tensorflow.python.keras import constraints from tensorflow.python.keras import initializers @@ -268,7 +267,7 @@ class Softmax(Layer): self.axis = axis def call(self, inputs): - return activations.softmax(inputs, axis=self.axis) + return K.softmax(inputs, axis=self.axis) def get_config(self): config = {'axis': self.axis} @@ -322,11 +321,10 @@ class ReLU(Layer): def call(self, inputs): # alpha is used for leaky relu slope in activations instead of # negative_slope. - return activations.relu( - inputs, - alpha=self.negative_slope, - max_value=self.max_value, - threshold=self.threshold) + return K.relu(inputs, + alpha=self.negative_slope, + max_value=self.max_value, + threshold=self.threshold) def get_config(self): config = { -- GitLab From a9a1d5a673ad085777e6a8b14cbe39a427493e51 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 14 Sep 2018 12:44:31 -0700 Subject: [PATCH 0216/1357] Add --config=v2 option to the .bazelrc file. PiperOrigin-RevId: 213027176 --- configure.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/configure.py b/configure.py index 52a513779e..e9d162fbd2 100644 --- a/configure.py +++ b/configure.py @@ -1572,6 +1572,9 @@ def main(): if is_windows(): set_windows_build_flags(environ_cp) + # Add a config option to build TensorFlow 2.0 API. + write_to_bazelrc('build:v2 --define=tf_api_version=2') + if get_var( environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', False, -- GitLab From 91fa9ad89589b7d20200bb19cf3c271d71fa3bdc Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 14 Sep 2018 12:52:57 -0700 Subject: [PATCH 0217/1357] Populate custom name in registration. PiperOrigin-RevId: 213028338 --- tensorflow/contrib/lite/mutable_op_resolver.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/lite/mutable_op_resolver.cc b/tensorflow/contrib/lite/mutable_op_resolver.cc index 8ee63d2a02..d7c0181720 100644 --- a/tensorflow/contrib/lite/mutable_op_resolver.cc +++ b/tensorflow/contrib/lite/mutable_op_resolver.cc @@ -34,6 +34,7 @@ void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op, int min_version, int max_version) { for (int version = min_version; version <= max_version; ++version) { TfLiteRegistration new_registration = *registration; + new_registration.custom_name = nullptr; new_registration.builtin_code = op; new_registration.version = version; auto op_key = std::make_pair(op, version); @@ -47,6 +48,7 @@ void MutableOpResolver::AddCustom(const char* name, for (int version = min_version; version <= max_version; ++version) { TfLiteRegistration new_registration = *registration; new_registration.builtin_code = BuiltinOperator_CUSTOM; + new_registration.custom_name = name; new_registration.version = version; auto op_key = std::make_pair(name, version); custom_ops_[op_key] = new_registration; -- GitLab From 0981b26dd4f5d1b9b3baaecbb61533a658a95c2a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 14 Sep 2018 13:31:24 -0700 Subject: [PATCH 0218/1357] Disable the flaky test case in timeline_test PiperOrigin-RevId: 213034078 --- tensorflow/python/client/timeline_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index 03effde098..281d7f2e2b 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -134,7 +134,7 @@ class TimelineTest(test.TestCase): ctf = tl.generate_chrome_trace_format() self._validateTrace(ctf) - def testAnalysisAndAllocations(self): + def disabled_testAnalysisAndAllocations(self): run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() -- GitLab From f104b477ab22d5bc71afa757ec0cdeaca8666909 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 13:49:13 -0700 Subject: [PATCH 0219/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213037039 --- .../internal/optimized/optimized_ops.h | 326 ++++++++++++------ 1 file changed, 220 insertions(+), 106 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 8962d830a3..2fa5d6445e 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -81,6 +81,7 @@ using reference_ops::Select; using reference_ops::SpaceToBatchND; using reference_ops::Split; using reference_ops::StridedSlice; +using reference_ops::TensorFlowSplit; using reference_ops::Transpose; // TODO(b/80247582) Remove this constant. @@ -183,6 +184,15 @@ ArrayMap MapAsArrayWithFirstDimAsRows(Scalar* data, return ArrayMap(data, rows, cols); } +template +ArrayMap MapAsArrayWithLastDimAsRows(Scalar* data, + const RuntimeShape& shape) { + const int dims_count = shape.DimensionsCount(); + const int rows = shape.Dims(dims_count - 1); + const int cols = FlatSizeSkipDim(shape, dims_count - 1); + return ArrayMap(data, rows, cols); +} + // Copied from tensorflow/core/framework/tensor_types.h template struct TTypes { @@ -3628,62 +3638,96 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, } } -inline void LstmCell(const float* input_data, const Dims<4>& input_dims, - const float* prev_activ_data, - const Dims<4>& prev_activ_dims, const float* weights_data, - const Dims<4>& weights_dims, const float* bias_data, - const Dims<4>& bias_dims, const float* prev_state_data, - const Dims<4>& prev_state_dims, float* output_state_data, - const Dims<4>& output_state_dims, float* output_activ_data, - const Dims<4>& output_activ_dims, float* concat_temp_data, - const Dims<4>& concat_temp_dims, float* activ_temp_data, - const Dims<4>& activ_temp_dims) { +inline void LstmCell( + const LstmCellParams& params, const RuntimeShape& unextended_input_shape, + const float* input_data, const RuntimeShape& unextended_prev_activ_shape, + const float* prev_activ_data, const RuntimeShape& weights_shape, + const float* weights_data, const RuntimeShape& unextended_bias_shape, + const float* bias_data, const RuntimeShape& unextended_prev_state_shape, + const float* prev_state_data, + const RuntimeShape& unextended_output_state_shape, float* output_state_data, + const RuntimeShape& unextended_output_activ_shape, float* output_activ_data, + const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data, + const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) { gemmlowp::ScopedProfilingLabel label("LstmCell"); - MatchingArraySize( // batches - input_dims, 3, prev_activ_dims, 3, prev_state_dims, 3, output_state_dims, - 3, output_activ_dims, 3); - MatchingArraySize( // height - input_dims, 2, prev_activ_dims, 2, prev_state_dims, 2, output_state_dims, - 2, output_activ_dims, 2); - MatchingArraySize( // width - input_dims, 1, prev_activ_dims, 1, prev_state_dims, 1, output_state_dims, - 1, output_activ_dims, 1); - TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1); - TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1); - const int input_depth = ArraySize(input_dims, 0); - const int prev_activ_depth = ArraySize(prev_activ_dims, 0); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = + RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + + const int weights_dim_count = weights_shape.DimensionsCount(); + MatchingDim( // batches + input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, + output_state_shape, 0, output_activ_shape, 0); + MatchingDim( // height + input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, + output_state_shape, 1, output_activ_shape, 1); + MatchingDim( // width + input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, + output_state_shape, 2, output_activ_shape, 2); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); const int total_input_depth = prev_activ_depth + input_depth; - TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth); - TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3), - 1); + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), + total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); const int intern_activ_depth = - MatchingArraySize(weights_dims, 1, bias_dims, 0); - TFLITE_CHECK_EQ(intern_activ_depth % 4, 0); + MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), + intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); const int output_depth = - MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0, - output_state_dims, 0, output_activ_dims, 0); - TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4); + MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); // Concatenate prev_activ and input data together std::vector concat_input_arrays_data; - std::vector const*> concat_input_arrays_dims; + std::vector concat_input_arrays_shapes; concat_input_arrays_data.push_back(input_data); concat_input_arrays_data.push_back(prev_activ_data); - concat_input_arrays_dims.push_back(&input_dims); - concat_input_arrays_dims.push_back(&prev_activ_dims); - Concatenation( - 0, &(concat_input_arrays_data[0]), &(concat_input_arrays_dims[0]), - concat_input_arrays_data.size(), concat_temp_data, concat_temp_dims); + concat_input_arrays_shapes.push_back(&input_shape); + concat_input_arrays_shapes.push_back(&prev_activ_shape); + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = concat_input_arrays_data.size(); + Concatenation(concat_params, &(concat_input_arrays_shapes[0]), + &(concat_input_arrays_data[0]), concat_temp_shape, + concat_temp_data); // Fully connected - FullyConnected( - concat_temp_data, concat_temp_dims, weights_data, weights_dims, bias_data, - bias_dims, activ_temp_data, activ_temp_dims); + tflite::FullyConnectedParams fc_params; + fc_params.float_activation_min = std::numeric_limits::lowest(); + fc_params.float_activation_max = std::numeric_limits::max(); + FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, + weights_data, bias_shape, bias_data, activ_temp_shape, + activ_temp_data); // Map raw arrays to Eigen arrays so we can use Eigen's optimized array // operations. ArrayMap activ_temp_map = - MapAsArrayWithFirstDimAsRows(activ_temp_data, activ_temp_dims); + MapAsArrayWithLastDimAsRows(activ_temp_data, activ_temp_shape); auto input_gate_sm = activ_temp_map.block(0 * output_depth, 0, output_depth, activ_temp_map.cols()); auto new_input_sm = activ_temp_map.block(1 * output_depth, 0, output_depth, @@ -3693,11 +3737,11 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, auto output_gate_sm = activ_temp_map.block(3 * output_depth, 0, output_depth, activ_temp_map.cols()); ArrayMap prev_state_map = - MapAsArrayWithFirstDimAsRows(prev_state_data, prev_state_dims); + MapAsArrayWithLastDimAsRows(prev_state_data, prev_state_shape); ArrayMap output_state_map = - MapAsArrayWithFirstDimAsRows(output_state_data, output_state_dims); + MapAsArrayWithLastDimAsRows(output_state_data, output_state_shape); ArrayMap output_activ_map = - MapAsArrayWithFirstDimAsRows(output_activ_data, output_activ_dims); + MapAsArrayWithLastDimAsRows(output_activ_data, output_activ_shape); // Combined memory state and final output calculation gemmlowp::ScopedProfilingLabel label2("MemoryStateAndFinalOutput"); @@ -3711,56 +3755,120 @@ inline void LstmCell(const float* input_data, const Dims<4>& input_dims, output_state_map.tanh(); } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void LstmCell(const float* input_data, const Dims<4>& input_dims, + const float* prev_activ_data, + const Dims<4>& prev_activ_dims, const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, const float* prev_state_data, + const Dims<4>& prev_state_dims, float* output_state_data, + const Dims<4>& output_state_dims, float* output_activ_data, + const Dims<4>& output_activ_dims, float* concat_temp_data, + const Dims<4>& concat_temp_dims, float* activ_temp_data, + const Dims<4>& activ_temp_dims) { + tflite::LstmCellParams op_params; + // Float LSTM cell does not need parameters to be set: leave untouched. + + LstmCell(op_params, DimsToShape(input_dims), input_data, + DimsToShape(prev_activ_dims), prev_activ_data, + DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims), + bias_data, DimsToShape(prev_state_dims), prev_state_data, + DimsToShape(output_state_dims), output_state_data, + DimsToShape(output_activ_dims), output_activ_data, + DimsToShape(concat_temp_dims), concat_temp_data, + DimsToShape(activ_temp_dims), activ_temp_data); +} + // Quantized LSTM cell. Currently just a copy of the reference impl in // reference_ops.h. See the big function comment there, not replicating it // here. template -void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, - const uint8* prev_activ_data_uint8, - const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8, - const Dims<4>& weights_dims, const int32* bias_data_int32, - const Dims<4>& bias_dims, const int16* prev_state_data_int16, - const Dims<4>& prev_state_dims, int16* output_state_data_int16, - const Dims<4>& output_state_dims, uint8* output_activ_data_uint8, - const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8, - const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16, - const Dims<4>& activ_temp_dims, int32 weights_zero_point, - int32 accum_multiplier, int accum_shift, - gemmlowp::GemmContext* gemm_context) { +inline void LstmCell( + const LstmCellParams& params, const RuntimeShape& unextended_input_shape, + const uint8* input_data_uint8, + const RuntimeShape& unextended_prev_activ_shape, + const uint8* prev_activ_data_uint8, const RuntimeShape& weights_shape, + const uint8* weights_data_uint8, const RuntimeShape& unextended_bias_shape, + const int32* bias_data_int32, + const RuntimeShape& unextended_prev_state_shape, + const int16* prev_state_data_int16, + const RuntimeShape& unextended_output_state_shape, + int16* output_state_data_int16, + const RuntimeShape& unextended_output_activ_shape, + uint8* output_activ_data_uint8, + const RuntimeShape& unextended_concat_temp_shape, + uint8* concat_temp_data_uint8, + const RuntimeShape& unextended_activ_temp_shape, + int16* activ_temp_data_int16, gemmlowp::GemmContext* gemm_context) { + int32 weights_zero_point = params.weights_zero_point; + int32 accum_multiplier = params.accum_multiplier; + int accum_shift = params.accum_shift; gemmlowp::ScopedProfilingLabel label( "LstmCell/quantized (8bit external, 16bit internal)"); + TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); + const RuntimeShape input_shape = + RuntimeShape::ExtendedShape(4, unextended_input_shape); + const RuntimeShape prev_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); + const RuntimeShape bias_shape = + RuntimeShape::ExtendedShape(4, unextended_bias_shape); + const RuntimeShape prev_state_shape = + RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); + const RuntimeShape output_state_shape = + RuntimeShape::ExtendedShape(4, unextended_output_state_shape); + const RuntimeShape output_activ_shape = + RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); + const RuntimeShape concat_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); + const RuntimeShape activ_temp_shape = + RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + // Gather dimensions information, and perform consistency checks. - const int outer_size = - MatchingFlatSizeSkipDim(input_dims, 0, prev_activ_dims, prev_state_dims, - output_state_dims, output_activ_dims); - TFLITE_CHECK_EQ(ArraySize(weights_dims, 2), 1); - TFLITE_CHECK_EQ(ArraySize(weights_dims, 3), 1); - const int input_depth = ArraySize(input_dims, 0); - const int prev_activ_depth = ArraySize(prev_activ_dims, 0); + const int weights_dim_count = weights_shape.DimensionsCount(); + const int outer_size = MatchingFlatSizeSkipDim( + input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape, + output_activ_shape); + const int input_depth = input_shape.Dims(3); + const int prev_activ_depth = prev_activ_shape.Dims(3); const int total_input_depth = prev_activ_depth + input_depth; - TFLITE_CHECK_EQ(ArraySize(weights_dims, 0), total_input_depth); - TFLITE_CHECK_EQ(MatchingArraySize(bias_dims, 1, bias_dims, 2, bias_dims, 3), - 1); + TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), + total_input_depth); const int intern_activ_depth = - MatchingArraySize(weights_dims, 1, bias_dims, 0); - TFLITE_CHECK_EQ(intern_activ_depth % 4, 0); + MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); + TFLITE_DCHECK_EQ(weights_shape.FlatSize(), + intern_activ_depth * total_input_depth); + TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); + TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); const int output_depth = - MatchingArraySize(prev_state_dims, 0, prev_activ_dims, 0, - output_state_dims, 0, output_activ_dims, 0); - TFLITE_CHECK_EQ(output_depth, intern_activ_depth / 4); - const int fc_batches = FlatSizeSkipDim(activ_temp_dims, 0); + MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, + 3, output_activ_shape, 3); + TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); + const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); const int fc_output_depth = - MatchingArraySize(weights_dims, 1, activ_temp_dims, 0); - const int fc_accum_depth = ArraySize(weights_dims, 0); - TFLITE_CHECK_EQ(fc_output_depth, 4 * output_depth); + MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); + const int fc_accum_depth = total_input_depth; + TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); // Depth-concatenate prev_activ and input data together. uint8 const* concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8}; - Dims<4> const* concat_input_arrays_dims[2] = {&input_dims, &prev_activ_dims}; - Concatenation( - 0, concat_input_arrays_data, concat_input_arrays_dims, 2, - concat_temp_data_uint8, concat_temp_dims); + const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape, + &prev_activ_shape}; + tflite::ConcatenationParams concat_params; + concat_params.axis = 3; + concat_params.inputs_count = 2; + Concatenation(concat_params, concat_input_arrays_shapes, + concat_input_arrays_data, concat_temp_shape, + concat_temp_data_uint8); // Implementation of the fully connected node inside the LSTM cell. // The operands are 8-bit integers, the accumulators are internally 32bit @@ -3770,11 +3878,10 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, bool gemm_already_performed = false; #ifdef GEMMLOWP_NEON if (fc_batches == 1 && !(fc_output_depth % 4) && !(fc_accum_depth % 8)) { - GEMVForLstmCell(DimsToShape(concat_temp_dims), concat_temp_data_uint8, - DimsToShape(weights_dims), weights_data_uint8, - weights_zero_point, DimsToShape(bias_dims), bias_data_int32, - accum_multiplier, accum_shift, DimsToShape(activ_temp_dims), - activ_temp_data_int16); + GEMVForLstmCell(concat_temp_shape, concat_temp_data_uint8, weights_shape, + weights_data_uint8, weights_zero_point, bias_shape, + bias_data_int32, accum_multiplier, accum_shift, + activ_temp_shape, activ_temp_data_int16); gemm_already_performed = true; } #endif @@ -3963,28 +4070,35 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, } } -template -void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, - int outputs_count, Scalar* const* output_data, - const Dims<4>* const* output_dims) { - gemmlowp::ScopedProfilingLabel label("TensorFlowSplit"); - TFLITE_DCHECK_GE(outputs_count, 1); - for (int i = 0; i < outputs_count; i++) { - MatchingFlatSizeSkipDim(*output_dims[i], 0, input_dims); - } - const int outer_size = FlatSizeSkipDim(input_dims, 0); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - // For now we don't have a model with a TensorFlowSplit - // with fused activation function. - TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); - const Scalar* input_ptr = input_data; - for (int k = 0; k < outer_size; k++) { - for (int i = 0; i < outputs_count; ++i) { - memcpy(output_data[i] + k * output_dims[i]->sizes[0], input_ptr, - output_dims[i]->sizes[0] * sizeof(Scalar)); - input_ptr += output_dims[i]->sizes[0]; - } - } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +template +void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, + const uint8* prev_activ_data_uint8, + const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8, + const Dims<4>& weights_dims, const int32* bias_data_int32, + const Dims<4>& bias_dims, const int16* prev_state_data_int16, + const Dims<4>& prev_state_dims, int16* output_state_data_int16, + const Dims<4>& output_state_dims, uint8* output_activ_data_uint8, + const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8, + const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16, + const Dims<4>& activ_temp_dims, int32 weights_zero_point, + int32 accum_multiplier, int accum_shift, + gemmlowp::GemmContext* gemm_context) { + tflite::LstmCellParams op_params; + op_params.weights_zero_point = weights_zero_point; + op_params.accum_multiplier = accum_multiplier; + op_params.accum_shift = accum_shift; + + LstmCell( + op_params, DimsToShape(input_dims), input_data_uint8, + DimsToShape(prev_activ_dims), prev_activ_data_uint8, + DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims), + bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16, + DimsToShape(output_state_dims), output_state_data_int16, + DimsToShape(output_activ_dims), output_activ_data_uint8, + DimsToShape(concat_temp_dims), concat_temp_data_uint8, + DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context); } inline int NodeOffset(int b, int h, int w, int height, int width) { -- GitLab From 19d66a950e2091bb598c6a2d375e14208f5773b2 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 14 Sep 2018 14:07:14 -0700 Subject: [PATCH 0220/1357] Disable flaky gpu_base_test PiperOrigin-RevId: 213040362 --- tensorflow/contrib/tensorrt/BUILD | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 9e8979bce4..4ea7216ef2 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -444,7 +444,6 @@ cuda_py_test( cuda_py_tests( name = "tf_trt_integration_test", srcs = [ - "test/base_test.py", "test/batch_matmul_test.py", "test/biasadd_matmul_test.py", "test/binary_tensor_weight_broadcast_test.py", @@ -471,6 +470,26 @@ cuda_py_tests( ], ) +cuda_py_tests( + name = "base_test", + srcs = [ + "test/base_test.py", + ], + additional_deps = [ + ":tf_trt_integration_test_base", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_test_lib", + ], + tags = [ + "manual", + "no_cuda_on_cpu_tap", + "no_gpu", + "no_windows", + "nomac", + "notap", + ], +) + cc_library( name = "utils", srcs = ["convert/utils.cc"], -- GitLab From b5594e6121e902f8dd2d5127653a1ec5f97daccd Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Fri, 14 Sep 2018 14:15:05 -0700 Subject: [PATCH 0221/1357] Added TFE_OpSetAttrTensor() to eager C API. Also added some experimental C APIs for facilitate the use of eager C APIs in S4TF compiler. PiperOrigin-RevId: 213041780 --- tensorflow/c/c_api_experimental.cc | 50 ++++++++++++++++++++++++++++++ tensorflow/c/c_api_experimental.h | 9 ++++++ tensorflow/c/eager/c_api.cc | 7 +++++ tensorflow/c/eager/c_api.h | 5 +++ 4 files changed, 71 insertions(+) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index c195c9e01c..3bcc62cf2d 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -8705,3 +8705,53 @@ TFE_TensorHandle* TFE_DequeueVariantTensor(TF_Session* session, int tensor_id, return createTFEDequeue(ctx, TF_VARIANT, queue, status); } + +static void CheckOk(TF_Status* status) { + CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); +} + +void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) { + auto* status = TF_NewStatus(); + TF_Tensor* t = TFE_TensorHandleResolve(handle, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + + tensorflow::Tensor dst; + TF_CHECK_OK(TF_TensorToTensor(t, &dst)); + LOG(INFO) << dst.DebugString(); + + TF_DeleteTensor(t); + TF_DeleteStatus(status); +} + +TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx) { + // Intentionally LOG into INFO below for ease of debugging. + VLOG(1) << "TFE_RunConstOp called"; + + auto* status = TF_NewStatus(); + auto* op = TFE_NewOp(ctx, "Const", status); + CheckOk(status); + TFE_OpSetAttrType(op, "dtype", TF_FLOAT); + + auto* tensor = + TF_AllocateTensor(TF_FLOAT, /*shape.data()*/ nullptr, /*shape.size()*/ 0, + TF_DataTypeSize(TF_FLOAT) * 1); + auto* ptr = reinterpret_cast(TF_TensorData(tensor)); + *reinterpret_cast(ptr) = 17.0; + + TFE_OpSetAttrTensor(op, "value", tensor, status); + CheckOk(status); + TF_DeleteTensor(tensor); + VLOG(1) << "New op created"; + + TFE_TensorHandle* retval; + int num_retvals = 1; + TFE_Execute(op, &retval, &num_retvals, status); + CheckOk(status); + CHECK_EQ(num_retvals, 1); + VLOG(1) << "Op executed"; + + TFE_DeleteOp(op); + TF_DeleteStatus(status); + + return retval; +} diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 522c91f67e..a3ca847d96 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -174,6 +174,15 @@ TF_CAPI_EXPORT extern void TFE_EnqueueVariantTensor(TF_Session* session, TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor( TF_Session* session, int tensor_id, TF_Status* status); +// Prints `handle` in a human readable format to standard output for debugging. +TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString( + TFE_TensorHandle* handle); + +// Returns a const scalar tensor. +// Caller owns both the input and the output tensor handles. +// TODO: Remove this API with hard-coded tensor computation. +TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 349d9bcd7c..6f86ea80e5 100755 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -567,6 +567,13 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name, op->operation.MutableAttrs()->Set(attr_name, attr_value); } +void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor, + TF_Status* status) { + tensorflow::Tensor t; + status->status = TF_TensorToTensor(tensor, &t); + if (status->status.ok()) op->operation.MutableAttrs()->Set(attr_name, t); +} + void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name, const void* const* values, const size_t* lengths, int num_values) { diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 337447eec9..a87d73ec8e 100755 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -311,6 +311,11 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name, const TFE_Op* value); +TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op, + const char* attr_name, + TF_Tensor* tensor, + TF_Status* status); + TF_CAPI_EXPORT extern void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name, const void* const* values, -- GitLab From 84d8423bececc26f127a1c40c00588463d8d1650 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 14 Sep 2018 15:00:55 -0700 Subject: [PATCH 0222/1357] Generalize TransformFilter method in preparation of NHWC Conv support PiperOrigin-RevId: 213049674 --- .../fused_conv2d_bias_activation_op.cc | 3 +- tensorflow/core/kernels/conv_2d.h | 45 +++++++---- .../core/kernels/conv_grad_filter_ops.cc | 3 +- .../core/kernels/conv_grad_input_ops.cc | 6 +- tensorflow/core/kernels/conv_grad_ops_3d.cc | 6 +- tensorflow/core/kernels/conv_ops.cc | 7 +- tensorflow/core/kernels/conv_ops_3d.cc | 6 +- tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | 81 ++++++++----------- 8 files changed, 84 insertions(+), 73 deletions(-) diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 716bb87e38..e9e6464d06 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -497,7 +497,8 @@ void LaunchFusedConv2DBiasActivationOp:: FORMAT_OIHW, filter_param.shape(), FORMAT_HWIO), &maybe_transformed_filter)); functor::TransformFilter()( - ctx->eigen_device(), To32Bit(filter_param.tensor()), + ctx->eigen_device(), FORMAT_OIHW, + To32Bit(filter_param.tensor()), To32Bit(maybe_transformed_filter.tensor())); filter = &maybe_transformed_filter; } diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h index de9b69828e..639c3062cc 100644 --- a/tensorflow/core/kernels/conv_2d.h +++ b/tensorflow/core/kernels/conv_2d.h @@ -137,17 +137,16 @@ struct MatMulConvFunctor { } }; -// Shuffles a filter tensor from: -// [, in, out] -// to: -// [out, in, ] +// Shuffles a filter tensor from TensorFlow format HWIO to dst_filter_format. +// +// Note: Currently OIHW is the only supported destination format. Support for +// OHWI format will be added in a follow-up change. template struct TransformFilter { - void operator()(const Device& d, + void operator()(const Device& d, FilterTensorFormat dst_filter_format, typename TTypes::ConstTensor in, typename TTypes::Tensor out) { - // We want a 3, 2, 0, 1 shuffle. Merge the spatial dimensions together - // to speed up the shuffle operation. + // Merge the spatial dimensions together to speed up the shuffle operation. Eigen::DSizes merged_dims; merged_dims[0] = in.dimension(0); // spatial dimensions for (int i = 1; i < NDIMS - 2; ++i) { @@ -156,16 +155,30 @@ struct TransformFilter { merged_dims[1] = in.dimension(NDIMS - 2); // input filters merged_dims[2] = in.dimension(NDIMS - 1); // output filters + CHECK(dst_filter_format == FORMAT_OIHW) + << "Unsupported destination filter format: " + << ToString(dst_filter_format); + // Source filter format is FORMAT_HWIO and spatial dimensions HW are merged + // in the beginning. + Eigen::DSizes shuffling_perm = + Eigen::DSizes(2, 1, 0); + Eigen::DSizes expanded_dims; - expanded_dims[0] = in.dimension(NDIMS - 1); // output filters - expanded_dims[1] = in.dimension(NDIMS - 2); // input filters - for (int i = 0; i < NDIMS - 2; ++i) { // spatial dimensions - expanded_dims[i + 2] = in.dimension(i); + int out_index = 0; + for (int merged_dim = 0; merged_dim < merged_dims.rank(); ++merged_dim) { + if (shuffling_perm[merged_dim] == 0) { + for (int spatial_dim = 0; spatial_dim < NDIMS - 2; ++spatial_dim) { + expanded_dims[out_index++] = in.dimension(spatial_dim); + } + } else { + constexpr int kLastSpatialDim = NDIMS - 3; + expanded_dims[out_index++] = + in.dimension(kLastSpatialDim + shuffling_perm[merged_dim]); + } } - out.device(d) = in.reshape(merged_dims) - .shuffle(Eigen::DSizes(2, 1, 0)) - .reshape(expanded_dims); + out.device(d) = + in.reshape(merged_dims).shuffle(shuffling_perm).reshape(expanded_dims); } }; @@ -282,7 +295,9 @@ struct SwapDimension0And2InTensor3 { const gtl::ArraySlice& input_dims, T* out); }; -// Reverses the effect of TransformFilter above. +// Transforms back filter from OIHW to HWOI format to reverse effect of +// TransformFilter above. +// TODO(hinsu): Support reverse transformation from filter format OHWI as well. template struct ReverseTransformFilter { void operator()(const Device& d, typename TTypes::ConstTensor in, diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index 63b1bcda43..9e86a16b66 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -1018,7 +1018,8 @@ namespace functor { extern template struct InflatePadAndShuffle; \ template <> \ void TransformFilter::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor in, \ + const GPUDevice& d, FilterTensorFormat dst_filter_format, \ + typename TTypes::ConstTensor in, \ typename TTypes::Tensor out); \ extern template struct TransformFilter; \ template <> \ diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index d664a11e73..43bb5ea56c 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -901,7 +901,8 @@ void LaunchConv2DBackpropInputOp::operator()( &transformed_filter)); functor::TransformFilter()( - ctx->eigen_device(), To32Bit(filter.tensor()), + ctx->eigen_device(), FORMAT_OIHW, + To32Bit(filter.tensor()), To32Bit(transformed_filter.tensor())); Tensor transformed_out_backprop; @@ -1090,7 +1091,8 @@ namespace functor { extern template struct InflatePadAndShuffle; \ template <> \ void TransformFilter::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor in, \ + const GPUDevice& d, FilterTensorFormat dst_filter_format, \ + typename TTypes::ConstTensor in, \ typename TTypes::Tensor out); \ extern template struct TransformFilter; \ template <> \ diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index d26b86c712..bab91f5e86 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -1054,7 +1054,8 @@ namespace functor { #define DECLARE_GPU_SPEC(T) \ template <> \ void TransformFilter::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor in, \ + const GPUDevice& d, FilterTensorFormat dst_filter_format, \ + typename TTypes::ConstTensor in, \ typename TTypes::Tensor out); \ template <> \ void ReverseTransformFilter::operator()( \ @@ -1287,7 +1288,8 @@ class Conv3DBackpropInputOp : public OpKernel { dims.filter_size(1), dims.filter_size(2)}), &transformed_filter)); functor::TransformFilter()( - context->eigen_device(), To32Bit(filter.tensor()), + context->eigen_device(), FORMAT_OIHW, + To32Bit(filter.tensor()), To32Bit(transformed_filter.tensor())); // Shape: batch, filters, z, y, x. diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index ef692418d6..6f5c8d8461 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -680,9 +680,9 @@ void LaunchConv2DOp::operator()( TensorShape({filter.dim_size(3), filter.dim_size(2), filter.dim_size(0), filter.dim_size(1)}), &transformed_filter)); - functor::TransformFilter()( - ctx->eigen_device(), To32Bit(filter.tensor()), + ctx->eigen_device(), FORMAT_OIHW, + To32Bit(filter.tensor()), To32Bit(transformed_filter.tensor())); Tensor transformed_output; @@ -823,7 +823,8 @@ namespace functor { extern template struct MatMulConvFunctor; \ template <> \ void TransformFilter::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor in, \ + const GPUDevice& d, FilterTensorFormat dst_filter_format, \ + typename TTypes::ConstTensor in, \ typename TTypes::Tensor out); \ extern template struct TransformFilter; \ template <> \ diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index a1eed4e68c..5c2b88924b 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -386,7 +386,8 @@ struct LaunchConvOp { // filter: [x, y, z, in, out] // t_filter: [out, in, x, y, z] functor::TransformFilter()( - ctx->eigen_device(), To32Bit(filter.tensor()), + ctx->eigen_device(), FORMAT_OIHW, + To32Bit(filter.tensor()), To32Bit(transformed_filter.tensor())); Tensor transformed_output; @@ -514,7 +515,8 @@ namespace functor { #define DECLARE_GPU_SPEC(T) \ template <> \ void TransformFilter::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor in, \ + const GPUDevice& d, FilterTensorFormat dst_filter_format, \ + typename TTypes::ConstTensor in, \ typename TTypes::Tensor out); \ template <> \ void ReverseTransformFilter::operator()( \ diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc index a5fa48f85e..46167db3a2 100644 --- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc +++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc @@ -170,51 +170,33 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index FlatToTensorIndex( return tensor_index; } -// A Cuda custom kernel that swaps dimension-0 and dimension-2 of a 3D tensor. -template -__global__ void SwapDimension0And2InTensor3Simple(int nthreads, const T* input, - Dimension<3> input_dims, - T* output) { - Dimension<3> output_dims; - output_dims[0] = input_dims[2]; - output_dims[1] = input_dims[1]; - output_dims[2] = input_dims[0]; - - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int output_index = index; - - Index<3> output_tensor_index = FlatToTensorIndex(output_index, output_dims); - - Index<3> input_tensor_index; - input_tensor_index[0] = output_tensor_index[2]; - input_tensor_index[1] = output_tensor_index[1]; - input_tensor_index[2] = output_tensor_index[0]; - - int input_index = TensorIndexToFlat(input_tensor_index, input_dims); - - output[output_index] = - maybe_conj::run(ldg(input + input_index)); - } -} - -// A Cuda custom kernel that swaps dimension-1 and dimension-2 of a 3D tensor. -template -__global__ void SwapDimension1And2InTensor3Simple(int nthreads, const T* input, - Dimension<3> input_dims, - T* output) { +// A simple CUDA custom kernel to shuffle dimensions of a 3D tensor according to +// the given shuffle permutation in template parameters. Shuffle permutation +// shuffles dimensions such that input dimension 0 goes to sp0, +// 1 goes to sp1 and 2 goes to sp2. For example, shuffle permutation <2, 0, 1> +// will populate output so that input[x][y][z] is equal to (*output)[y][z][x]. +// +// Requires that nthreads is equal to the total number of elements in the input +// tensor. +template +__global__ void ShuffleInTensor3Simple(int nthreads, const T* input, + Dimension<3> input_dims, T* output) { Dimension<3> output_dims; - output_dims[0] = input_dims[0]; - output_dims[1] = input_dims[2]; - output_dims[2] = input_dims[1]; - - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int output_index = index; + output_dims[sp0] = input_dims[0]; + output_dims[sp1] = input_dims[1]; + output_dims[sp2] = input_dims[2]; + + // Iterate over output as opposed to iterating over input for better + // performance. Iterating over output will generate sequential writes and + // random reads that performs better compared to sequential reads and random + // writes. + CUDA_1D_KERNEL_LOOP(output_index, nthreads) { Index<3> output_tensor_index = FlatToTensorIndex(output_index, output_dims); Index<3> input_tensor_index; - input_tensor_index[0] = output_tensor_index[0]; - input_tensor_index[1] = output_tensor_index[2]; - input_tensor_index[2] = output_tensor_index[1]; + input_tensor_index[0] = output_tensor_index[sp0]; + input_tensor_index[1] = output_tensor_index[sp1]; + input_tensor_index[2] = output_tensor_index[sp2]; int input_index = TensorIndexToFlat(input_tensor_index, input_dims); @@ -439,7 +421,7 @@ __global__ void PadInputCustomKernelNCHW(int nthreads, const T* input, template struct TransformFilter { typedef GPUDevice Device; - void operator()(const Device& d, + void operator()(const Device& d, FilterTensorFormat dst_filter_format, typename TTypes::ConstTensor in, typename TTypes::Tensor out) { Dimension<3> combined_dims; @@ -450,13 +432,18 @@ struct TransformFilter { combined_dims[1] = in.dimension(NDIMS - 2); // input filters combined_dims[2] = in.dimension(NDIMS - 1); // output filters CudaLaunchConfig config = GetCudaLaunchConfig(out.size(), d); - SwapDimension0And2InTensor3Simple + + CHECK(dst_filter_format == FORMAT_OIHW) + << "Unsupported output layout: " << ToString(dst_filter_format); + + ShuffleInTensor3Simple <<>>( config.virtual_thread_count, in.data(), combined_dims, out.data()); } }; -// Converts Cudnn filter format back to TensorFlow filter format. +// Converts Cudnn filter format OIHW back to TensorFlow filter format HWIO. +// TODO(hinsu): Support reverse transformation from filter format OHWI as well. template struct ReverseTransformFilter { typedef GPUDevice Device; @@ -470,7 +457,7 @@ struct ReverseTransformFilter { combined_dims[2] *= in.dimension(i); } CudaLaunchConfig config = GetCudaLaunchConfig(out.size(), d); - SwapDimension0And2InTensor3Simple + ShuffleInTensor3Simple <<>>( config.virtual_thread_count, in.data(), combined_dims, out.data()); } @@ -937,7 +924,7 @@ void RunSwapDimension1And2InTensor3(const GPUDevice& d, const T* input, } else { int total_element_count = input_dims[0] * input_dims[1] * input_dims[2]; CudaLaunchConfig config = GetCudaLaunchConfig(total_element_count, d); - SwapDimension1And2InTensor3Simple + ShuffleInTensor3Simple <<>>( config.virtual_thread_count, input, input_dims, output); } @@ -969,7 +956,7 @@ struct SwapDimension0And2InTensor3 { static_cast(combined_dims[2])}; size_t total_size = combined_dims[0] * combined_dims[1] * combined_dims[2]; CudaLaunchConfig config = GetCudaLaunchConfig(total_size, d); - SwapDimension0And2InTensor3Simple + ShuffleInTensor3Simple <<>>( config.virtual_thread_count, in, input_dims, out); } -- GitLab From ceb72bcdbf90fd23204b26f8e43afbd3c0a46563 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 14 Sep 2018 15:16:09 -0700 Subject: [PATCH 0223/1357] [TF:XLA] Remove special base case from BatchDot that has been redundant ever since xla::DotGeneral was added. PiperOrigin-RevId: 213052269 --- tensorflow/compiler/tf2xla/lib/batch_dot.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/compiler/tf2xla/lib/batch_dot.cc b/tensorflow/compiler/tf2xla/lib/batch_dot.cc index 64f2d781a6..5400e8834c 100644 --- a/tensorflow/compiler/tf2xla/lib/batch_dot.cc +++ b/tensorflow/compiler/tf2xla/lib/batch_dot.cc @@ -100,16 +100,6 @@ xla::XlaOp BatchDot(xla::XlaOp x, xla::XlaOp y, bool transpose_x, precision_proto.add_operand_precision(precision); precision_proto.add_operand_precision(precision); - // If there are no batch dimensions, use a regular Dot. - // TODO(b/69062148) Remove this code when Dot emitters can be passed - // dimensions to transpose directly (i.e. without requiring a Transpose - // HLO). - if (batch_dimension_numbers.empty()) { - auto lhs = transpose_x ? xla::Transpose(x, {1, 0}) : x; - auto rhs = transpose_y ? xla::Transpose(y, {1, 0}) : y; - return xla::Dot(lhs, rhs, &precision_proto); - } - xla::DotDimensionNumbers dot_dnums; dot_dnums.add_lhs_contracting_dimensions(x_inner_dim); dot_dnums.add_rhs_contracting_dimensions(y_inner_dim); -- GitLab From 98342d8bea440c960a3a08bf3f27df737b2b2b11 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 14 Sep 2018 15:23:31 -0700 Subject: [PATCH 0224/1357] Disable flaky keras_test. PiperOrigin-RevId: 213053512 --- tensorflow/contrib/distribute/python/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index aaecbb0eb1..f72b827e04 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -731,9 +731,12 @@ cuda_py_test( ":keras_test_lib", ], tags = [ + "manual", "multi_and_single_gpu", + "no_gpu", "no_pip", "no_windows_gpu", + "notap", "notsan", ], ) -- GitLab From 9eba75e54e87aa00efae482c69797794d7020950 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Fri, 14 Sep 2018 16:08:40 -0700 Subject: [PATCH 0225/1357] Refactored some of the metrics code in compile function for better readability. - Logic change: Moved getting metric name and function out of the training/eval loops in eager mode - Moved setting metric attributes on the model out the function which calls metric functions. PiperOrigin-RevId: 213060143 --- tensorflow/python/keras/engine/training.py | 233 +++++++++--------- .../python/keras/engine/training_utils.py | 64 ++++- 2 files changed, 172 insertions(+), 125 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index fed07c4120..dc464c02b6 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -145,32 +145,34 @@ class Model(Network): if i not in skip_target_weighing_indices ] - def _get_metric_name(self, metric, output_index, weighted=False): - """Returns the metric name corresponding to the given metric input. + def _cache_output_metric_attributes(self, metrics, weighted_metrics): + """Caches metric name and function attributes for every model output.""" + output_shapes = [ + None if output is None else output.get_shape().as_list() + for output in self.outputs + ] + self._per_output_metrics = training_utils.collect_per_output_metric_info( + metrics, self.output_names, output_shapes, self.loss_functions) + self._per_output_weighted_metrics = \ + training_utils.collect_per_output_metric_info( + weighted_metrics, self.output_names, output_shapes, + self.loss_functions, self.sample_weights) + + def _add_unique_metric_name(self, metric_name, output_index): + """Makes the metric name unique and adds it to the model's metric name list. + + If there are multiple outputs for which the metrics are calculated, the + metric names have to be made unique by appending an integer. Arguments: - metric: Metric function name or reference. - output_index: Index of the current output. - weighted: Boolean indicating if the given metric is weighted. + metric_name: Metric name that corresponds to the metric specified by the + user. For example: 'acc'. + output_index: The index of the model output for which the metric name is + being added. Returns: - A metric name. + string, name of the model's unique metric name """ - metric_name_prefix = 'weighted_' if weighted else '' - if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): - if metric in ('accuracy', 'acc'): - suffix = 'acc' - elif metric in ('crossentropy', 'ce'): - suffix = 'ce' - else: - metric_fn = metrics_module.get(metric) - # Get metric name as string - if hasattr(metric_fn, 'name'): - suffix = metric_fn.name - else: - suffix = metric_fn.__name__ - metric_name = metric_name_prefix + suffix - if len(self.output_names) > 1: metric_name = '%s_%s' % (self.output_names[output_index], metric_name) j = 1 @@ -181,24 +183,54 @@ class Model(Network): return metric_name + def _init_metric_attributes(self): + """Initialized model metric attributes.""" + self.metrics_names = ['loss'] + self.metrics_tensors = [] + self.metrics_updates = [] + self.stateful_metric_names = [] + self.stateful_metric_functions = [] + + def _set_per_output_metric_attributes(self, metrics_dict, output_index): + """Sets the metric attributes on the model for the given output. + + Arguments: + metrics_dict: A dict with metric names as keys and metric fns as values. + output_index: The index of the model output for which the metric + attributes are added. + """ + for metric_name, metric_fn in metrics_dict.items(): + metric_name = self._add_unique_metric_name(metric_name, output_index) + # Keep track of metric name. + self.metrics_names.append(metric_name) + + # Keep track of stateful metric attributes (name and metric function). + if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful: + self.stateful_metric_names.append(metric_name) + self.stateful_metric_functions.append(metric_fn) + + def _set_metric_attributes(self, outputs, skip_target_indices=None): + """Sets the metric attributes on the model for all the model outputs.""" + skip_target_indices = skip_target_indices or [] + for i in range(len(outputs)): + if i in skip_target_indices: + continue + self._set_per_output_metric_attributes(self._per_output_metrics[i], i) + self._set_per_output_metric_attributes( + self._per_output_weighted_metrics[i], i) + def _handle_per_output_metrics(self, - metrics, + metrics_dict, y_true, y_pred, - output_index, - output_shape, - loss_fn, mask, weights=None): - """Calls metric functions and sets metric attributes for a single output. + """Calls metric functions for a single output. Arguments: - metrics: List of metrics. + metrics_dict: A dict with metric names as keys and metric fns as values. y_true: Target output. y_pred: Predicted output. - output_index: Index of the current output. - output_shape: Shape of the current output. - loss_fn: Loss function corresponding to the current output. mask: Computed mask value for the current output. weights: Weights to be applied on the current output. @@ -206,71 +238,45 @@ class Model(Network): A list of metric result tensors. """ metric_results = [] - for metric in metrics: - metric_fn = training_utils.get_metric_function( - metric, output_shape=output_shape, loss_fn=loss_fn) - - if (context.executing_eagerly() and y_true is not None and - y_pred is not None): - # In eager mode, when executing metric_fn during training, we do not - # need to generate unique metric name and add it to the model - # as we have done that during compile already. - prefix = 'weighted_' if weights is not None else '' - suffix = metric_fn.name if hasattr(metric_fn, - 'name') else metric_fn.__name__ - metric_name = prefix + suffix - else: - # Get metric name that is to be added to the model. - metric_name = self._get_metric_name( - metric, output_index, weighted=weights is not None) - # Keep track of metric name. - self.metrics_names.append(metric_name) - - # Keep track of stateful metric attributes (name and metric function). - if isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful: - self.stateful_metric_names.append(metric_name) - self.stateful_metric_functions.append(metric_fn) - + for metric_name, metric_fn in metrics_dict.items(): with K.name_scope(metric_name): - # If both outputs and targets are available, call the metric function. - if y_true is not None and y_pred is not None: - if isinstance(metric_fn, metrics_module.Metric): - # Call the stateful metric function. - if mask is not None: - mask = math_ops.cast(mask, y_pred.dtype) - # Update weights with mask. - if weights is None: - weights = mask - else: - # Update shape of weights if possible before adding mask. - # Update dimensions of weights to match with mask if possible. - mask, _, weights = metrics_module.squeeze_or_expand_dimensions( - mask, None, weights) - try: - # Broadcast weights if possible. - weights = weights_broadcast_ops.broadcast_weights( - weights, mask) - except ValueError: - pass - # TODO(psv): Handle case when mask and weight shapes are not - # compatible. - weights *= mask - - metric_result = metric_fn(y_true, y_pred, weights) - else: - # Call the stateless metric function. - weighted_metric_fn = training_utils.weighted_masked_objective( - metric_fn) - metric_result = weighted_metric_fn( - y_true, y_pred, weights=weights, mask=mask) - - if not context.executing_eagerly(): - # Keep track of metric result tensor. - self.metrics_tensors.append(metric_result) - metric_results.append(metric_result) - - if (isinstance(metric_fn, base_layer.Layer) and metric_fn.stateful and - not context.executing_eagerly()): + if isinstance(metric_fn, metrics_module.Metric): + # Call the stateful metric function. + if mask is not None: + mask = math_ops.cast(mask, y_pred.dtype) + # Update weights with mask. + if weights is None: + weights = mask + else: + # Update shape of weights if possible before adding mask. + # Update dimensions of weights to match with mask if possible. + mask, _, weights = metrics_module.squeeze_or_expand_dimensions( + mask, None, weights) + try: + # Broadcast weights if possible. + weights = weights_broadcast_ops.broadcast_weights(weights, mask) + except ValueError: + pass + # TODO(psv): Handle case when mask and weight shapes are not + # compatible. + weights *= mask + + metric_result = metric_fn(y_true, y_pred, weights) + else: + # Call the stateless metric function. + weighted_metric_fn = training_utils.weighted_masked_objective( + metric_fn) + metric_result = weighted_metric_fn( + y_true, y_pred, weights=weights, mask=mask) + + if not context.executing_eagerly(): + # Keep track of metric result tensor. + self.metrics_tensors.append(metric_result) + + metric_results.append(metric_result) + is_stateful = isinstance(metric_fn, + base_layer.Layer) and metric_fn.stateful + if is_stateful and not context.executing_eagerly(): # Keep track of updates created by stateful metrics. self.metrics_updates += metric_fn.updates return metric_results @@ -281,7 +287,7 @@ class Model(Network): targets=None, sample_weights=None, masks=None): - """Handles calling metric functions and setting model metric attributes. + """Handles calling metric functions. Arguments: outputs: List of outputs (predictions). @@ -301,20 +307,15 @@ class Model(Network): continue output = outputs[i] if outputs else None target = targets[i] if targets else None - output_shape = None if output is None else output.get_shape().as_list() output_mask = masks[i] if masks else None metric_results.extend( - self._handle_per_output_metrics( - self.nested_metrics[i], target, output, i, output_shape, - self.loss_functions[i], output_mask)) + self._handle_per_output_metrics(self._per_output_metrics[i], target, + output, output_mask)) metric_results.extend( self._handle_per_output_metrics( - self.nested_weighted_metrics[i], + self._per_output_weighted_metrics[i], target, output, - i, - output_shape, - self.loss_functions[i], output_mask, weights=sample_weights[i])) return metric_results @@ -506,24 +507,15 @@ class Model(Network): self.loss_weights_list = loss_weights_list # Initialize model metric attributes. - self.metrics_names = ['loss'] - self.metrics_tensors = [] - self.metrics_updates = [] - self.stateful_metric_names = [] - self.stateful_metric_functions = [] - - # Nested metrics is a list of list of metrics. - # One list per output of the model. - self.nested_metrics = training_utils.collect_metrics( - metrics, self.output_names) - self.nested_weighted_metrics = training_utils.collect_metrics( - weighted_metrics, self.output_names) + self._init_metric_attributes() # Initialization for Eager mode execution. if context.executing_eagerly(): # Prepare sample weights. self._set_sample_weight_attributes(sample_weight_mode, skip_target_weighing_indices) + # Save all metric attributes per output of the model. + self._cache_output_metric_attributes(metrics, weighted_metrics) if target_tensors is not None: raise ValueError('target_tensors are not currently supported in Eager ' @@ -534,10 +526,10 @@ class Model(Network): self.metrics_names.append(self.output_names[i] + '_loss') # Set metric attributes on model. - self._handle_metrics( + self._set_metric_attributes( self.outputs, skip_target_indices=skip_target_indices, - sample_weights=self.sample_weights) + ) self.targets = [] for i in range(len(self.outputs)): @@ -600,6 +592,8 @@ class Model(Network): # Prepare sample weights. self._set_sample_weight_attributes(sample_weight_mode, skip_target_weighing_indices) + # Save all metric attributes per output of the model. + self._cache_output_metric_attributes(metrics, weighted_metrics) # Compute total loss. total_loss = None @@ -634,6 +628,11 @@ class Model(Network): for loss_tensor in self.losses: total_loss += loss_tensor + # Set metric attributes on model. + self._set_metric_attributes( + self.outputs, + skip_target_indices=skip_target_indices, + ) # Invoke metric functions for all the outputs. self._handle_metrics( self.outputs, diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 8e9fab81d6..9c303f4bed 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import OrderedDict import copy import math @@ -484,29 +485,36 @@ def check_loss_and_target_compatibility(targets, loss_fns, output_shapes): 'as the output.') -def collect_metrics(metrics, output_names): - """Maps metric functions to model outputs. +def collect_per_output_metric_info(metrics, + output_names, + output_shapes, + loss_fns, + sample_weights=None): + """Maps metric names and functions to model outputs. Arguments: metrics: a list or dict of metric functions. output_names: a list of the names (strings) of model outputs. + output_shapes: a list of the shapes (strings) of model outputs. + loss_fns: a list of the loss functions corresponding to the model outputs. + sample_weights: a list of weights to be applied on the model outputs. Returns: - A list (one entry per model output) of lists of metric functions. + A list (one entry per model output) of dicts. For instance, if the model has 2 outputs, and for the first output we want to compute "binary_accuracy" and "binary_crossentropy", and just "binary_accuracy" for the second output, - the list would look like: - `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` + the list would look like: `[[('acc', binary_accuracy()), + ('ce', binary_crossentropy())], [('acc', binary_accuracy())]]` Raises: TypeError: if an incorrect type is passed for the `metrics` argument. """ if not metrics: - return [[] for _ in output_names] + return [{} for _ in output_names] if isinstance(metrics, list): # we then apply all metrics to all outputs. - return [copy.copy(metrics) for _ in output_names] + nested_metrics = [copy.copy(metrics) for _ in output_names] elif isinstance(metrics, dict): nested_metrics = [] for name in output_names: @@ -514,11 +522,24 @@ def collect_metrics(metrics, output_names): if not isinstance(output_metrics, list): output_metrics = [output_metrics] nested_metrics.append(output_metrics) - return nested_metrics else: raise TypeError('Type of `metrics` argument not understood. ' 'Expected a list or dictionary, found: ' + str(metrics)) + per_output_metrics = [] + for i, metrics in enumerate(nested_metrics): + metrics_dict = OrderedDict() + for metric in metrics: + weighted = False if (sample_weights is None) else ( + sample_weights[i] is not None) + metric_name = get_metric_name(metric, weighted) + metric_fn = get_metric_function( + metric, output_shape=output_shapes[i], loss_fn=loss_fns[i]) + metrics_dict[metric_name] = metric_fn + per_output_metrics.append(metrics_dict) + + return per_output_metrics + def batch_shuffle(index_array, batch_size): """Shuffles an array in a batch-wise fashion. @@ -729,6 +750,33 @@ def has_tensors(ls): return tensor_util.is_tensor(ls) +def get_metric_name(metric, weighted=False): + """Returns the name corresponding to the given metric input. + + Arguments: + metric: Metric function name or reference. + weighted: Boolean indicating if the given metric is weighted. + + Returns: + The metric name. + """ + metric_name_prefix = 'weighted_' if weighted else '' + if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): + if metric in ('accuracy', 'acc'): + suffix = 'acc' + elif metric in ('crossentropy', 'ce'): + suffix = 'ce' + else: + metric_fn = metrics_module.get(metric) + # Get metric name as string + if hasattr(metric_fn, 'name'): + suffix = metric_fn.name + else: + suffix = metric_fn.__name__ + metric_name = metric_name_prefix + suffix + return metric_name + + def get_metric_function(metric, output_shape=None, loss_fn=None): """Returns the metric function corresponding to the given metric input. -- GitLab From bdca15c5e5c09e5c97f4357bd2a792da54746e94 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 16:12:07 -0700 Subject: [PATCH 0226/1357] Fixed documentation of Optimizer.minimize() for eager mode to match behavior of Optimizer.compute_gradients(). PiperOrigin-RevId: 213060585 --- tensorflow/python/training/optimizer.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 2304a461c1..699162b30c 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -385,13 +385,12 @@ class Optimizer( @compatibility(eager) When eager execution is enabled, `loss` should be a Python function that - takes elements of `var_list` as arguments and computes the value to be - minimized. If `var_list` is None, `loss` should take no arguments. - Minimization (and gradient computation) is done with respect to the - elements of `var_list` if not None, else with respect to any trainable - variables created during the execution of the `loss` function. - `gate_gradients`, `aggregation_method`, `colocate_gradients_with_ops` and - `grad_loss` are ignored when eager execution is enabled. + takes no arguments and computes the value to be minimized. Minimization (and + gradient computation) is done with respect to the elements of `var_list` if + not None, else with respect to any trainable variables created during the + execution of the `loss` function. `gate_gradients`, `aggregation_method`, + `colocate_gradients_with_ops` and `grad_loss` are ignored when eager + execution is enabled. @end_compatibility """ grads_and_vars = self.compute_gradients( -- GitLab From 1c2a300d483d9e5d5502cdd8131644f7647996c5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Sep 2018 16:23:48 -0700 Subject: [PATCH 0227/1357] Fix spelling in error message PiperOrigin-RevId: 213062112 --- tensorflow/python/estimator/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 0f20acefdf..90280fd25d 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -329,7 +329,7 @@ class Estimator(object): run_config.TaskType.PS): raise ValueError( 'Train has been called wrong configuration. Please use ' - 'tf.estimator.train_and_evaluate which calls propper API according ' + 'tf.estimator.train_and_evaluate which calls proper API according ' 'to given configuration. Current configuration: {}.'.format( self.config)) -- GitLab From 74b9d6a48286f38807bbd204d9d55467e02387ca Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Fri, 14 Sep 2018 16:25:36 -0700 Subject: [PATCH 0228/1357] [Intel MKL] Fixes for unit test failures 1) Changes in partitioned_function_ops.cc are for passing Global OpRegistry as default_registry in PartitionedFunction op This fix addresses failure in MKL layout pass when PartitionedFunction op calls graph optimization passes. The problem was that the function library definition that is used to create function graph and corresponding subgraphs after partitioning did not use global OpRegistry as the default OpRegistry used for look of operator names. Because of that, standard operators such as "Const" were not available to graph passes. 2) Changes in mkl_cpu_allocator.h are to address failure in mkl_cpu_allocator_test which was expecting that max_bytes_limits is returned via GetStats() in MKLCPUAllocator. --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 3 +++ tensorflow/core/kernels/partitioned_function_ops.cc | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index df9c3a686c..593f855ea2 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -277,6 +277,9 @@ class MklCPUAllocator : public VisitableAllocator { // max_alloc_size from large_size_allocator would be the maximum // size allocated by MklCPUAllocator. stats->max_alloc_size = l_stats.max_alloc_size; + + stats->bytes_limit = + std::max(s_stats.bytes_limit, l_stats.bytes_limit); } void ClearStats() override { diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index fc1c9003aa..ddb621967a 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -97,7 +97,12 @@ class PartitionedCallOp : public AsyncOpKernel { OP_REQUIRES_ASYNC(ctx, fbody != nullptr, errors::Internal("Could not find handle ", handle), done); - auto graph = tensorflow::MakeUnique(fbody->graph->flib_def()); + // We need to pass global op_registry as default_registry when creating + // graph. So that graph optimization passes can lookup all possible ops + // by name. + FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), + fbody->graph->flib_def().ToProto()); + auto graph = tensorflow::MakeUnique(func_lib_def); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -250,9 +255,10 @@ class PartitionedCallOp : public AsyncOpKernel { VLOG(3) << "Partitioned function '" << func_.name() << "', yielding " << partitions.size() << " shards."; - const FunctionLibraryDefinition* flib_def = &graph->flib_def(); + FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), + graph->flib_def().ToProto()); for (const auto& partition : partitions) { - std::unique_ptr subgraph(new Graph(flib_def)); + std::unique_ptr subgraph(new Graph(func_lib_def)); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = true; -- GitLab From e179c17b96bcb855b2056f60851a24551b4189a6 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Fri, 14 Sep 2018 16:43:25 -0700 Subject: [PATCH 0229/1357] Makes tf.Variable arguments (non-captured) DT_RESOURCE function inputs. Previously, tf.Variable arguments to a defun-d Python function were made captured inputs. This change makes it possible to parameterize functions on DT_RESOURCE inputs. PiperOrigin-RevId: 213064739 --- tensorflow/python/eager/function.py | 60 ++++++++++++++++++++---- tensorflow/python/eager/function_test.py | 37 +++++++++++++++ 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 962e334b27..f3fb48fd3b 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -65,7 +65,7 @@ gradients_impl._function = sys.modules[__name__] # pylint: disable=protected-ac WHITELIST_FUNCTION_ATTRIBUTE_PREFIX = "experimental_" -def _create_substitute_placeholder(value, name, dtype=None): +def _create_substitute_placeholder(value, name=None, dtype=None): """Creates a placeholder for `value` and propagates shape info to it.""" # Note: setting ops.control_dependencies(None) ensures we always put # capturing placeholders outside of any control flow context. @@ -550,7 +550,19 @@ class Function(object): self._distributed_variables[component_variable.handle] = variable def __call__(self, *args): - """Executes the wrapped function.""" + """Executes the wrapped function. + + Args: + *args: a list of Tensors or Variables. + + Returns: + The result of applying the TF function to `args`. + + Raises: + ValueError: If the current device stack does not match the device stack + under which the function was created, or if `args` contains anything + other than Tensors or Variables. + """ ctx = context.context() device_functions = _get_device_functions(ctx, ops.get_default_graph()) if device_functions != self._device_functions: @@ -566,7 +578,18 @@ class Function(object): tape.variable_accessed(v) captures = self._resolve_captured_inputs() - tensor_inputs = [x for x in nest.flatten(args) if isinstance(x, ops.Tensor)] + tensor_inputs = [] + for i, arg in enumerate(nest.flatten(args)): + if isinstance(arg, resource_variable_ops.ResourceVariable): + if arg.trainable: + tape.variable_accessed(arg) + tensor_inputs.append(arg.handle) + elif isinstance(arg, ops.Tensor): + tensor_inputs.append(arg) + else: + raise ValueError("All inputs to `Function`s must be Tensors; " + "on invocation of %s, the %d-th input (%s) was not a " + "Tensor." % (self._func_graph.name, i, str(arg))) args = tensor_inputs + captures if tape.should_record(tensor_inputs) or tape.should_record(captures): @@ -817,10 +840,6 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None): func_kwds = {} # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. - func_graph.inputs.extend( - x for x in nest.flatten(func_args) + nest.flatten(func_kwds) - if isinstance(x, ops.Tensor)) - # Variables to help check whether mutation happens in calling the function # Copy the recursive list, tuple and map structure, but not base objects func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args)) @@ -867,6 +886,26 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None): finally: tape.pop_tape(this_tape) + # Variables in `func_args`, `func_kwds` should be explicit inputs + # to the function, not captured inputs. + variables = set(this_tape.watched_variables()) + inputs = [] + for arg in nest.flatten(func_args) + nest.flatten(func_kwds): + if isinstance(arg, resource_variable_ops.ResourceVariable): + try: + resource_placeholder = func_graph.captures.pop(arg.handle) + variables.remove(arg) + except KeyError: + # This case occurs if a Variable among the inputs is not actually + # used by the function; we still add an explicit input for it + # because the user should presumably pass the Variable as an input + # to the corresponding graph function. + resource_placeholder = _create_substitute_placeholder(arg.handle) + inputs.append(resource_placeholder) + elif isinstance(arg, ops.Tensor): + inputs.append(arg) + func_graph.inputs = inputs + list(func_graph.captures.values()) + func_graph.structured_outputs = func_outputs # Returning a closed-over tensor does not trigger convert_to_tensor. func_graph.outputs.extend( @@ -878,7 +917,7 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None): # Instead of storing non-distributed component variables, we # store their distributed containers so we can retrieve the correct # component variables at call-time. - variables = list(this_tape.watched_variables()) + variables = list(variables) strategy = distribution_strategy_context.get_distribution_strategy() for i, variable in enumerate(variables): # If variable is not distributed value_container returns itself. @@ -1201,7 +1240,10 @@ class PolymorphicFunction(object): self._variables.extend( [v for v in graph_function.variables if v not in self._variables]) self._function_cache[cache_key] = graph_function - return graph_function, (args, kwds) + return graph_function, [ + t for t in nest.flatten((args, kwds)) + if isinstance(t, (ops.Tensor, resource_variable_ops.ResourceVariable)) + ] def register(func, *args, **kwargs): diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index a0abefe666..c168b6060c 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1685,6 +1685,43 @@ class FunctionTest(test.TestCase): # pylint: disable=protected-access self.assertEqual(len(graph._functions), 1) + def testCallingFunctionWithDifferentVariables(self): + + @function.defun + def foo(v): + v.assign_add(1.0) + return v.read_value() + + v = resource_variable_ops.ResourceVariable(0.0) + graph_function = foo.get_concrete_function(v) + self.assertEqual(len(graph_function.inputs), 1) + self.assertEqual(len(graph_function.captured_inputs), 0) + + self.assertEqual(float(graph_function(v)), 1.0) + self.assertEqual(float(graph_function(v)), 2.0) + + w = resource_variable_ops.ResourceVariable(0.0) + + @function.defun + def bar(v): + del v + return constant_op.constant(1.0) + + graph_function = bar.get_concrete_function(v) + self.assertEqual(float(graph_function(v)), 1.0) + self.assertEqual(float(graph_function(w)), 1.0) + + def testCallingFunctionWithNonTensorsFails(self): + + @function.defun + def foo(x): + return x + + graph_function = foo.get_concrete_function(constant_op.constant(1.0)) + with self.assertRaisesRegexp(ValueError, 'All inputs to `Function`s must ' + 'be Tensors;.*'): + graph_function('Not a Tensor.') + @test_util.with_c_shapes class AutomaticControlDependenciesTest(test.TestCase): -- GitLab From 2e44b4681a16f4127502b0330228d7d4b33f8ee5 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 14 Sep 2018 17:03:41 -0700 Subject: [PATCH 0230/1357] Switch to Eigen::Index in Tensorflow kernels. Mixing index type doesn't work well with latest Eigen. PiperOrigin-RevId: 213067224 --- .../contrib/tensor_forest/kernels/tree_utils.cc | 12 ++++++------ tensorflow/core/kernels/bias_op.cc | 13 +++++++------ tensorflow/core/kernels/unravel_index_op.cc | 10 ++++++---- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc index cefcc96051..dd5d028314 100644 --- a/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc +++ b/tensorflow/contrib/tensor_forest/kernels/tree_utils.cc @@ -67,11 +67,11 @@ float ClassificationSplitScore( const Eigen::Tensor& splits, const Eigen::Tensor& rights, int32 num_classes, int i) { - Eigen::array offsets; + Eigen::array offsets; // Class counts are stored with the total in [0], so the length of each // count vector is num_classes + 1. offsets[0] = i * (num_classes + 1) + 1; - Eigen::array extents; + Eigen::array extents; extents[0] = num_classes; return WeightedGiniImpurity(splits.slice(offsets, extents)) + WeightedGiniImpurity(rights.slice(offsets, extents)); @@ -97,7 +97,7 @@ void GetTwoBestClassification(const Tensor& total_counts, // arguments to ClassificationSplitScore. const Eigen::Tensor splits = split_counts.Slice(accumulator, accumulator + 1).unaligned_flat(); - Eigen::array bcast; + Eigen::array bcast; bcast[0] = num_splits; const Eigen::Tensor rights = tc.broadcast(bcast) - splits; @@ -130,8 +130,8 @@ float RegressionSplitScore( const Eigen::Tensor& right_sums, const Eigen::Tensor& right_squares, int32 accumulator, int32 num_regression_dims, int i) { - Eigen::array offsets = {i * num_regression_dims + 1}; - Eigen::array extents = {num_regression_dims - 1}; + Eigen::array offsets = {i * num_regression_dims + 1}; + Eigen::array extents = {num_regression_dims - 1}; float left_count = splits_count_accessor(accumulator, i, 0); float right_count = totals_count_accessor(accumulator, 0) - left_count; @@ -178,7 +178,7 @@ void GetTwoBestRegression(const Tensor& total_sums, const Tensor& total_squares, const auto splits_count_accessor = split_sums.tensor(); const auto totals_count_accessor = total_sums.tensor(); - Eigen::array bcast; + Eigen::array bcast; bcast[0] = num_splits; const auto right_sums = tc_sum.broadcast(bcast) - splits_sum; const auto right_squares = tc_square.broadcast(bcast) - splits_square; diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc index 7b28c8e91f..e15ea82e7d 100644 --- a/tensorflow/core/kernels/bias_op.cc +++ b/tensorflow/core/kernels/bias_op.cc @@ -134,8 +134,8 @@ class BiasOp : public BinaryOp { if (data_format_ == FORMAT_NCHW) { int32 batch, height, width, channel; GetBiasValueDims(input, data_format_, &batch, &height, &width, &channel); - Eigen::DSizes four_dims(1, channel, 1, 1); - Eigen::DSizes broad_cast_dims(batch, 1, height, width); + Eigen::DSizes four_dims(1, channel, 1, 1); + Eigen::DSizes broad_cast_dims(batch, 1, height, width); const Device& d = context->eigen_device(); output->tensor().device(d) = input.tensor() + @@ -247,14 +247,14 @@ class BiasGradOp : public OpKernel { OP_REQUIRES(context, output_backprop.dims() == 4, errors::InvalidArgument( "NCHW format supports only 4D input/output tensor.")); - Eigen::DSizes four_dims(batch, channel, height, width); + Eigen::DSizes four_dims(batch, channel, height, width); #ifdef EIGEN_HAS_INDEX_LIST using idx0 = Eigen::type2index<0>; using idx2 = Eigen::type2index<2>; using idx3 = Eigen::type2index<3>; Eigen::IndexList reduction_axes; #else - Eigen::array reduction_axes = {0, 2, 3}; + Eigen::array reduction_axes = {0, 2, 3}; #endif output->template flat().device(context->eigen_device()) = output_backprop.flat() @@ -263,11 +263,12 @@ class BiasGradOp : public OpKernel { .sum(reduction_axes) .template cast(); // End of code by intel_tf. } else { - Eigen::DSizes two_dims(batch * height * width, channel); + Eigen::DSizes two_dims(batch * height * width, + channel); #ifdef EIGEN_HAS_INDEX_LIST Eigen::IndexList > reduction_axis; #else - Eigen::array reduction_axis = {0}; + Eigen::array reduction_axis = {0}; #endif output->template flat().device(context->eigen_device()) = output_backprop.flat() diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc index 62e814ff77..8d839ba85a 100644 --- a/tensorflow/core/kernels/unravel_index_op.cc +++ b/tensorflow/core/kernels/unravel_index_op.cc @@ -97,10 +97,12 @@ class UnravelIndexOp : public OpKernel { auto output = output_tensor->matrix(); - Eigen::array reshape{{dims_tensor.NumElements(), 1}}; - Eigen::array bcast({1, indices_tensor.NumElements()}); - Eigen::array indices_reshape{{1, indices_tensor.NumElements()}}; - Eigen::array indices_bcast({dims_tensor.NumElements(), 1}); + Eigen::array reshape{{dims_tensor.NumElements(), 1}}; + Eigen::array bcast({1, indices_tensor.NumElements()}); + Eigen::array indices_reshape{ + {1, indices_tensor.NumElements()}}; + Eigen::array indices_bcast( + {dims_tensor.NumElements(), 1}); output = indices_tensor.vec() .reshape(indices_reshape) -- GitLab From 0d4cb43a540f08cb73c00fac662c961e4154ac32 Mon Sep 17 00:00:00 2001 From: James Qin Date: Fri, 14 Sep 2018 17:29:46 -0700 Subject: [PATCH 0231/1357] Revert PR #21997: Fixes the formatting issue pointed out at #21762 It breaks. should be s/input_shape/inputs_shape. PiperOrigin-RevId: 213070141 --- tensorflow/python/ops/rnn_cell_impl.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index 3e19183ff5..43cca1a498 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -428,7 +428,7 @@ class BasicRNNCell(LayerRNNCell): def build(self, inputs_shape): if inputs_shape[-1] is None: raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" - % str(input_shape)) + % str(inputs_shape)) input_depth = inputs_shape[-1] self._kernel = self.add_variable( @@ -525,7 +525,7 @@ class GRUCell(LayerRNNCell): def build(self, inputs_shape): if inputs_shape[-1] is None: raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" - % str(input_shape)) + % str(inputs_shape)) input_depth = inputs_shape[-1] self._gate_kernel = self.add_variable( @@ -705,7 +705,7 @@ class BasicLSTMCell(LayerRNNCell): def build(self, inputs_shape): if inputs_shape[-1] is None: raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" - % str(input_shape)) + % str(inputs_shape)) input_depth = inputs_shape[-1] h_depth = self._num_units @@ -908,7 +908,7 @@ class LSTMCell(LayerRNNCell): def build(self, inputs_shape): if inputs_shape[-1] is None: raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" - % str(input_shape)) + % str(inputs_shape)) input_depth = inputs_shape[-1] h_depth = self._num_units if self._num_proj is None else self._num_proj -- GitLab From 08589aa0c4447b21dd73183cf5cfafff326324dc Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 14 Sep 2018 18:22:52 -0700 Subject: [PATCH 0232/1357] Make accessed variable ordering deterministic again when constructing defuns PiperOrigin-RevId: 213074939 --- tensorflow/python/eager/function.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index f3fb48fd3b..e2874e25b6 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -888,13 +888,14 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None): # Variables in `func_args`, `func_kwds` should be explicit inputs # to the function, not captured inputs. - variables = set(this_tape.watched_variables()) + tape_variables = this_tape.watched_variables() + arg_variables = set() inputs = [] for arg in nest.flatten(func_args) + nest.flatten(func_kwds): if isinstance(arg, resource_variable_ops.ResourceVariable): try: resource_placeholder = func_graph.captures.pop(arg.handle) - variables.remove(arg) + arg_variables.add(arg) except KeyError: # This case occurs if a Variable among the inputs is not actually # used by the function; we still add an explicit input for it @@ -904,6 +905,7 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None): inputs.append(resource_placeholder) elif isinstance(arg, ops.Tensor): inputs.append(arg) + variables = [v for v in tape_variables if v not in arg_variables] func_graph.inputs = inputs + list(func_graph.captures.values()) func_graph.structured_outputs = func_outputs @@ -917,7 +919,6 @@ def func_graph_from_py_func(name, python_func, args, kwds, signature=None): # Instead of storing non-distributed component variables, we # store their distributed containers so we can retrieve the correct # component variables at call-time. - variables = list(variables) strategy = distribution_strategy_context.get_distribution_strategy() for i, variable in enumerate(variables): # If variable is not distributed value_container returns itself. -- GitLab From 33f57bd1311df97a25cd70784dfaafc8e44d07c4 Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Sat, 15 Sep 2018 12:46:58 +0900 Subject: [PATCH 0233/1357] clang-format --- .../core/kernels/extract_volume_patches_op.cc | 50 ++++++++++--------- .../core/kernels/extract_volume_patches_op.h | 12 ++--- .../extract_volume_patches_op_gpu.cu.cc | 2 +- tensorflow/core/ops/array_ops.cc | 16 +++--- 4 files changed, 44 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc index 0f1d566c75..52cd078a35 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op.cc +++ b/tensorflow/core/kernels/extract_volume_patches_op.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -/* -See extract_image_patches_op* files and docs for extract_image_patches in +/* +See extract_image_patches_op* files and docs for extract_image_patches in ../ops/image_ops.cc. Rates are not supported as of now, but the comments hint how to edit the code @@ -60,7 +60,7 @@ class ExtractVolumePatchesOp : public UnaryOp { : UnaryOp(context) { ParseAttributeVec5(context, "ksizes", &ksizes_); ParseAttributeVec5(context, "strides", &strides_); - //ParseAttributeVec5(context, "rates", &rates_); + // ParseAttributeVec5(context, "rates", &rates_); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -88,18 +88,20 @@ class ExtractVolumePatchesOp : public UnaryOp { /* // TODO(hsgkim): enable rates - // Rates are disabled as of now due to Eigen's definitions of extract_volume_patch - // functions; none of them accept rates as its argument and rates are fixed to - // (1, 1, 1, 1, 1). A workaround has to be found for this. + // Rates are disabled as of now due to Eigen's definitions of + // `extract_volume_patch` functions; none of them accept rates + // as its argument and rates are fixed to (1, 1, 1, 1, 1). A + // workaround has to be found for this. // In order to enable rates, uncomment the following lines and use - // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize - // calls. + // ksize_*_eff instead of ksize_* for the second argument of + // GetWindowedOutputSize calls. const int rate_planes = rates_[1]; const int rate_rows = rates_[2]; const int rate_cols = rates_[3]; - const int ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1); + const int ksize_planes_eff = ksize_planes + + (ksize_planes - 1) * (rate_planes - 1); const int ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1); const int ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1); */ @@ -116,8 +118,9 @@ class ExtractVolumePatchesOp : public UnaryOp { GetWindowedOutputSize(in_cols, ksize_cols, stride_cols, padding_, &out_cols, &pad_cols)); - const std::vector out_sizes = {batch, out_planes, out_rows, out_cols, - ksize_planes * ksize_rows * ksize_cols * depth}; + const std::vector out_sizes = { + batch, out_planes, out_rows, out_cols, + ksize_planes * ksize_rows * ksize_cols * depth}; TensorShape out_shape(out_sizes); Tensor* output = nullptr; @@ -129,9 +132,8 @@ class ExtractVolumePatchesOp : public UnaryOp { } functor::ExtractVolumePatchesForward()( - context->eigen_device(), input.tensor(), - ksize_planes, ksize_rows, ksize_cols, - stride_planes, stride_rows, stride_cols, + context->eigen_device(), input.tensor(), ksize_planes, + ksize_rows, ksize_cols, stride_planes, stride_rows, stride_cols, /* rate_planes, rate_rows, rate_cols, */ BrainPadding2EigenPadding(padding_), output->tensor()); } @@ -161,16 +163,18 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER); // Forward declarations of the functor specializations for GPU. namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void ExtractVolumePatchesForward::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor input, \ - int patch_planes, int patch_rows, int patch_cols, \ - int stride_planes, int stride_rows, int stride_cols, \ - /* int rate_planes, int rate_rows, int rate_cols, */ \ - const Eigen::PaddingType& padding, \ - typename TTypes::Tensor output); \ +// clang-format off +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ExtractVolumePatchesForward::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor input, \ + int patch_planes, int patch_rows, int patch_cols, \ + int stride_planes, int stride_rows, int stride_cols, \ + /* int rate_planes, int rate_rows, int rate_cols, */ \ + const Eigen::PaddingType& padding, \ + typename TTypes::Tensor output); \ extern template struct ExtractVolumePatchesForward; +// clang-format on TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); diff --git a/tensorflow/core/kernels/extract_volume_patches_op.h b/tensorflow/core/kernels/extract_volume_patches_op.h index e2418334ac..7e0502b770 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op.h +++ b/tensorflow/core/kernels/extract_volume_patches_op.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ #define TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/eigen_volume_patch.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { namespace functor { @@ -27,7 +27,7 @@ namespace functor { template struct ExtractVolumePatchesForward { void operator()(const Device& d, typename TTypes::ConstTensor input, - int patch_planes, int patch_rows, int patch_cols, + int patch_planes, int patch_rows, int patch_cols, int stride_planes, int stride_rows, int stride_cols, /* int rate_planes, int rate_rows, int rate_cols, */ const Eigen::PaddingType& padding, @@ -38,15 +38,15 @@ struct ExtractVolumePatchesForward { output_32bit.device(d) = To32Bit(input) .extract_volume_patches(patch_cols, patch_rows, patch_planes, - stride_cols, stride_rows, stride_planes, - padding) + stride_cols, stride_rows, stride_planes, + padding) .reshape(output_32bit.dimensions()); } else { output.device(d) = input .extract_volume_patches(patch_cols, patch_rows, patch_planes, - stride_cols, stride_rows, stride_planes, - padding) + stride_cols, stride_rows, stride_planes, + padding) .reshape(output.dimensions()); } } diff --git a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc index 08b3386c13..c636493602 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc +++ b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc @@ -17,8 +17,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/extract_volume_patches_op.h" +#include "tensorflow/core/framework/register_types.h" namespace tensorflow { diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 44908fe875..7ce4a39aca 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2609,7 +2609,8 @@ REGISTER_OP("ExtractVolumePatches") int32 rate_rows = rates[2]; int32 rate_cols = rates[3]; - int32 ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1); + int32 ksize_planes_eff = ksize_planes + + (ksize_planes - 1) * (rate_planes - 1); int32 ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1); int32 ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1); */ @@ -2619,10 +2620,12 @@ REGISTER_OP("ExtractVolumePatches") DimensionHandle in_rows_dim = c->Dim(input_shape, 2); DimensionHandle in_cols_dim = c->Dim(input_shape, 3); DimensionHandle output_depth_dim; - TF_RETURN_IF_ERROR(c->Multiply( - c->Dim(input_shape, 4), ksize_planes * ksize_rows * ksize_cols, &output_depth_dim)); + TF_RETURN_IF_ERROR(c->Multiply(c->Dim(input_shape, 4), + ksize_planes * ksize_rows * ksize_cols, + &output_depth_dim)); - if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) || !c->ValueKnown(in_cols_dim)) { + if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) || + !c->ValueKnown(in_cols_dim)) { ShapeHandle output_shape = c->MakeShape({batch_size_dim, InferenceContext::kUnknownDim, InferenceContext::kUnknownDim, output_depth_dim}); @@ -2647,8 +2650,9 @@ REGISTER_OP("ExtractVolumePatches") TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( in_cols, ksize_cols, stride_cols, padding, &output_cols, &padding_before, &padding_after)); - ShapeHandle output_shape = c->MakeShape( - {batch_size_dim, output_planes, output_rows, output_cols, output_depth_dim}); + ShapeHandle output_shape = + c->MakeShape({batch_size_dim, output_planes, output_rows, output_cols, + output_depth_dim}); c->set_output(0, output_shape); return Status::OK(); }); -- GitLab From 72359f9cfa10a08cecc3a179999a1b8ab835a818 Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Sat, 15 Sep 2018 16:02:22 +0800 Subject: [PATCH 0234/1357] fix bug of lacking axis when using array.ops.concat in unwrap_and_concat function --- tensorflow/python/estimator/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 90280fd25d..ff2baa0465 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -1653,7 +1653,7 @@ def _combine_distributed_scaffold(grouped_scaffold, distribution): def _unwrap_and_concat(value): value = nest.flatten(distribution.unwrap(value)) if len(value) != 1: - return array_ops.concat(value) + return array_ops.concat(value, 0) return value[0] ready_op = distribution.call_for_each_tower( -- GitLab From e517e2cf49a23d8561bcc5fcacbbb6674064b0e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 15 Sep 2018 02:01:56 -0700 Subject: [PATCH 0235/1357] compat: Update forward compatibility horizon to 2018-09-15 PiperOrigin-RevId: 213100589 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 8a100fe975..db850509ad 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 14) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 15) @tf_export("compat.forward_compatible") -- GitLab From eab14a9303f6268d97fa3b901cc09a71c86bba63 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Sat, 15 Sep 2018 06:04:12 -0700 Subject: [PATCH 0236/1357] [TPU] Deprecate the computation_shape attribute to the TpuReplicate op in lieu of a new num_cores_per_replica. PiperOrigin-RevId: 213111326 --- tensorflow/contrib/tpu/ops/replication_ops.cc | 11 ++++++----- tensorflow/contrib/tpu/python/tpu/tpu.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/contrib/tpu/ops/replication_ops.cc index 15a2bb17a9..285e11d92d 100644 --- a/tensorflow/contrib/tpu/ops/replication_ops.cc +++ b/tensorflow/contrib/tpu/ops/replication_ops.cc @@ -24,9 +24,11 @@ using shape_inference::ShapeHandle; REGISTER_OP("TPUReplicateMetadata") .Attr("num_replicas: int >= 0") + .Attr("num_cores_per_replica: int = 1") .Attr("topology: string = \"\"") .Attr("use_tpu: bool = true") .Attr("device_assignment: list(int) = []") + // Deprecated. Use num_cores_per_replica instead. .Attr("computation_shape: list(int) = []") .Attr("host_compute_core: list(string) = []") .SetShapeFn(shape_inference::UnknownShape); @@ -93,11 +95,11 @@ REGISTER_OP("TPUCompilationResult") REGISTER_OP("TPUReplicate") .Attr("computation: func") .Attr("num_replicas: int >= 1") + .Attr("num_cores_per_replica: int = 1") .Attr("topology: string = \"\"") .Attr("use_tpu: bool = true") .Attr("device_assignment: list(int) = []") .Attr("host_compute_core: list(string) = []") - .Attr("computation_shape: list(int) = []") .Attr("Tinputs: list(type) >= 0") .Attr("Tbroadcast_inputs: list(type) >= 0") .Attr("NumVariables: int >= 0") @@ -114,16 +116,15 @@ Runs replicated computations on a distributed TPU system. computation: a function containing the computation to run. num_replicas: the number of replicas of the computation to run. +num_cores_per_replica: the number of logical cores in each replica. topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU topology. use_tpu: a bool indicating if this computation will run on TPU or CPU/GPU. Currently, only supports a default placement (computation is placed on GPU if one is available, and on CPU if not). -computation_shape: a [mesh_dimension] array describing the shape of each - computation replica in numbers of cores in the TPU mesh. device_assignment: a flattened array with shape - [replica] + computation_shape + [mesh_dimension] that maps the coordinates of - logical cores in each replica of a computation to physical coordinates in + [replica, num_cores_per_replica, mesh_dimension] that maps the coordinates + of logical cores in each replica of a computation to physical coordinates in the TPU topology. Tinputs: the types of the arguments to 'computation'. inputs: the inputs to 'computation', flattened, in replica-major order. diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 0f9f7cd91b..815a087a24 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -26,6 +26,7 @@ from tensorflow.contrib.tpu.python.ops import tpu_ops from tensorflow.contrib.tpu.python.tpu import tpu_function from tensorflow.core.framework import attr_value_pb2 +from tensorflow.python.compat import compat as api_compat from tensorflow.python.framework import device as pydev from tensorflow.python.framework import errors from tensorflow.python.framework import ops @@ -558,10 +559,16 @@ def split_compile_and_replicate(computation, "topology": device_assignment.topology.serialized(), "device_assignment": - device_assignment.core_assignment.flatten().tolist(), - "computation_shape": - device_assignment.computation_shape.tolist() + device_assignment.core_assignment.flatten().tolist() } + # TODO(phawkins): remove this case after the forward compatibility window + # expires on 2018-10-6. + if api_compat.forward_compatible(2018, 10, 6): + metadata_kwargs["num_cores_per_replica"] = ( + device_assignment.num_cores_per_replica) + else: + metadata_kwargs["computation_shape"] = ( + device_assignment.computation_shape.tolist()) if ((not isinstance(inputs, list)) or any(not isinstance(inp, (list, tuple)) for inp in inputs)): -- GitLab From aa2094fc9dc6e67d6e440231828de05a6da3cf78 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 16 Sep 2018 02:11:30 -0700 Subject: [PATCH 0237/1357] compat: Update forward compatibility horizon to 2018-09-16 PiperOrigin-RevId: 213161736 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index db850509ad..c246a98237 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 15) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 16) @tf_export("compat.forward_compatible") -- GitLab From 92c31bb620b0f8dd6590380dc6a5674f591ce1cb Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Sun, 16 Sep 2018 12:01:52 -0700 Subject: [PATCH 0238/1357] Introduce gmock matchers for TensorFlow nodes I need these to write readable unit tests for TF graph transformations. All of my use cases will live inside tensorflow/compiler so putting it in tensorflow/compiler/jit for now; but we can move these out if other users are interested. In the future we may want to auto-generate type safe versions of these from the op registrations like we generate C++ wrappers today. PiperOrigin-RevId: 213186810 --- tensorflow/compiler/jit/BUILD | 29 ++ tensorflow/compiler/jit/node_matchers.cc | 458 ++++++++++++++++++ tensorflow/compiler/jit/node_matchers.h | 197 ++++++++ tensorflow/compiler/jit/node_matchers_test.cc | 179 +++++++ 4 files changed, 863 insertions(+) create mode 100644 tensorflow/compiler/jit/node_matchers.cc create mode 100644 tensorflow/compiler/jit/node_matchers.h create mode 100644 tensorflow/compiler/jit/node_matchers_test.cc diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index f4e1bc5e83..1001c57f3d 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -599,6 +599,35 @@ tf_cuda_cc_test( ], ) +cc_library( + name = "node_matchers", + testonly = True, + srcs = ["node_matchers.cc"], + hdrs = ["node_matchers.h"], + deps = [ + "//tensorflow/cc:ops", + "//tensorflow/compiler/xla:test", + "//tensorflow/core:graph", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + +tf_cc_test( + name = "node_matchers_test", + srcs = ["node_matchers_test.cc"], + deps = [ + ":node_matchers", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:ops", + "//tensorflow/core:ops", + "//tensorflow/core:test_main", + ], +) + # This target can be used by XLA device plugins to prevent circular dependencies, and provides access to all of the required headers for building a device library. cc_header_only_library( name = "xla_jit_headers_lib", diff --git a/tensorflow/compiler/jit/node_matchers.cc b/tensorflow/compiler/jit/node_matchers.cc new file mode 100644 index 0000000000..d8ace628e6 --- /dev/null +++ b/tensorflow/compiler/jit/node_matchers.cc @@ -0,0 +1,458 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/node_matchers.h" + +#include +#include "absl/algorithm/container.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "tensorflow/core/framework/tensor.pb.h" + +namespace tensorflow { +namespace testing { +namespace matchers { +namespace { + +using impl::NodeMatcherProperties; + +string IndentAllButFirstLine(absl::string_view text) { + std::vector lines = absl::StrSplit(text, '\n'); + for (int i = 1; i < lines.size(); i++) { + lines[i].insert(0, " "); + } + return absl::StrJoin(lines, "\n"); +} + +template +bool CompareTensor(const Tensor& actual, const Tensor& expected, + ::testing::MatchResultListener* listener) { + if (actual.NumElements() != expected.NumElements()) { + if (listener->IsInterested()) { + *listener << "\nwas looking for tensor with " << expected.NumElements() + << " elements, found tensor with " << actual.NumElements() + << " elements"; + return false; + } + } + + for (int64 i = 0, e = actual.NumElements(); i < e; i++) { + if (actual.flat()(i) != expected.flat()(i)) { + *listener << "\nmismatch in constant tensor at index " << i + << " expected = " << expected.flat()(i) + << " actual = " << actual.flat()(i); + return false; + } + } + + return true; +} + +bool MatchAndExplainTensor(const Tensor& tensor, const Tensor& expected_tensor, + ::testing::MatchResultListener* listener) { + if (tensor.dtype() != expected_tensor.dtype()) { + if (listener->IsInterested()) { + *listener << "\nexpected tensor of type " + << DataType_Name(expected_tensor.dtype()) + << " but found one of type " << DataType_Name(tensor.dtype()); + return false; + } + } + + switch (tensor.dtype()) { + case DT_FLOAT: + return CompareTensor(tensor, expected_tensor, listener); + case DT_DOUBLE: + return CompareTensor(tensor, expected_tensor, listener); + case DT_INT8: + return CompareTensor(tensor, expected_tensor, listener); + case DT_INT16: + return CompareTensor(tensor, expected_tensor, listener); + case DT_INT32: + return CompareTensor(tensor, expected_tensor, listener); + case DT_INT64: + return CompareTensor(tensor, expected_tensor, listener); + case DT_UINT8: + return CompareTensor(tensor, expected_tensor, listener); + case DT_UINT16: + return CompareTensor(tensor, expected_tensor, listener); + case DT_UINT32: + return CompareTensor(tensor, expected_tensor, listener); + case DT_UINT64: + return CompareTensor(tensor, expected_tensor, listener); + default: + LOG(FATAL) << "Unsupported dtype " // Crash ok: testonly. + << DataType_Name(tensor.dtype()); + } +} + +using Input = std::pair; + +struct NodeMatcher : public ::testing::MatcherInterface { + bool MatchAndExplain( + const Node* node, + ::testing::MatchResultListener* listener) const override { + if (op && node->type_string() != *op) { + if (listener->IsInterested()) { + *listener << "\nexpected op " << *op << " but found " + << node->type_string(); + } + return false; + } + + if (assigned_device && node->assigned_device_name() != *assigned_device) { + if (listener->IsInterested()) { + *listener << "\nexpected assigned_device " << *assigned_device + << " but found \"" << node->assigned_device_name() << "\""; + } + return false; + } + + if (name && node->name() != *name) { + if (listener->IsInterested()) { + *listener << "\nexpected name " << *name << " but found " + << node->name(); + } + return false; + } + + if (constant_value) { + const TensorProto* proto = nullptr; + if (!GetNodeAttr(node->def(), "value", &proto).ok()) { + if (listener->IsInterested()) { + *listener << "\ncould not find \"value\" attribute in node"; + } + return false; + } + + Tensor tensor(proto->dtype()); + if (!tensor.FromProto(*proto)) { + if (listener->IsInterested()) { + *listener << "\ncould not convert TensorProto in \"value\" attribute " + "to Tensor"; + } + return false; + } + + if (!MatchAndExplainTensor(/*tensor=*/tensor, + /*expected_tensor=*/*constant_value, + listener)) { + return false; + } + } + + if (input_matchers) { + if (input_matchers->size() != node->num_inputs()) { + if (listener->IsInterested()) { + *listener << "\nexpected " << input_matchers->size() + << " inputs but node has " << node->num_inputs(); + } + return false; + } + + for (int input_idx = 0, e = input_matchers->size(); input_idx < e; + input_idx++) { + if (!MatchAndExplainInput(node, input_idx, listener)) { + return false; + } + } + } + + std::vector control_deps; + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + control_deps.push_back(e->src()); + } + } + + ::testing::StringMatchResultListener inner_listener; + if (control_dep_set && + !control_dep_set->MatchAndExplain(control_deps, &inner_listener)) { + if (listener->IsInterested()) { + string explanation = inner_listener.str(); + if (!explanation.empty()) { + explanation = absl::StrCat(", ", explanation, ","); + } + *listener << "ctrl_deps" << explanation << " does not match expected: "; + control_dep_set->DescribeTo(listener->stream()); + } + return false; + } + return true; + } + + void DescribeTo(::std::ostream* os) const override { + std::vector predicates; + + if (name) { + predicates.push_back(absl::StrCat("name: ", *name)); + } + + if (op) { + predicates.push_back(absl::StrCat("op: ", *op)); + } + + if (assigned_device) { + predicates.push_back(absl::StrCat("assigned device: ", *assigned_device)); + } + + bool printed_something = !predicates.empty(); + + *os << absl::StrJoin(predicates, ", "); + + if (constant_value) { + printed_something = true; + *os << "constant value: " << constant_value->DebugString(); + } + + if (input_matchers) { + if (!input_matchers->empty()) { + printed_something = true; + *os << " with " << (input_matchers->size() == 1 ? "only " : "") + << "input" << (input_matchers->size() == 1 ? "" : "s") << " "; + } + + if (input_matchers->size() == 1) { + ::std::stringstream ss; + input_matchers->front().DescribeTo(&ss); + printed_something = true; + *os << "matching " << ss.str(); + } else { + int edge_idx = 0; + for (const ::testing::Matcher& matcher : (*input_matchers)) { + *os << "\n [" << edge_idx << "] matching ("; + ::std::stringstream ss; + matcher.DescribeTo(&ss); + printed_something = true; + *os << IndentAllButFirstLine(ss.str()); + *os << ")"; + edge_idx++; + } + } + } + + if (control_dep_set) { + printed_something = true; + *os << " and control deps "; + control_dep_set->DescribeTo(os); + } + + if (!printed_something) { + *os << "is any node"; + } + } + + bool MatchAndExplainInput(const Node* node, int input_idx, + ::testing::MatchResultListener* listener) const { + const Edge* edge; + if (!node->input_edge(input_idx, &edge).ok()) { + if (listener->IsInterested()) { + *listener << "\ncould not find incoming edge for input " << input_idx; + } + return false; + } + + ::testing::StringMatchResultListener inner_listener; + Input input = {edge->src(), edge->src_output()}; + if ((*input_matchers)[input_idx].MatchAndExplain(input, &inner_listener)) { + return true; + } + + if (listener->IsInterested()) { + *listener << "\ninput " << input_idx << " does not match expected:\n"; + (*input_matchers)[input_idx].DescribeTo(listener->stream()); + string explanation = inner_listener.str(); + if (!explanation.empty()) { + *listener << ", " << explanation; + } + } + return false; + } + + absl::optional op; + absl::optional name; + absl::optional assigned_device; + absl::optional constant_value; + absl::optional>> input_matchers; + absl::optional<::testing::Matcher>> + control_dep_set; +}; + +// Matches a dst and dst_output on an input edge. Today we only use this with +// dst_output=0 but we will eventually need to support multi-output operations. +class InputMatcher : public ::testing::MatcherInterface { + public: + InputMatcher(::testing::Matcher src_matcher, int src_output) + : src_matcher_(std::move(src_matcher)), src_output_(src_output) {} + + bool MatchAndExplain( + Input input, ::testing::MatchResultListener* listener) const override { + ::testing::StringMatchResultListener inner_listener; + if (!src_matcher_.MatchAndExplain(input.first, &inner_listener)) { + if (listener->IsInterested()) { + *listener << "\nsource does not match expected "; + src_matcher_.DescribeTo(listener->stream()); + string explanation = inner_listener.str(); + if (!explanation.empty()) { + *listener << "\n\t" << explanation; + } + } + return false; + } + if (input.second != src_output_) { + if (listener->IsInterested()) { + *listener << "\nexpected output slot to be " << src_output_ + << " but found " << input.second; + } + return false; + } + + return true; + } + + void DescribeTo(::std::ostream* os) const override { + if (src_output_) { + *os << "output slot: " << src_output_ << ", source: ("; + } + + src_matcher_.DescribeTo(os); + + if (src_output_) { + *os << ")"; + } + } + + private: + ::testing::Matcher src_matcher_; + int src_output_; +}; + +std::vector<::testing::Matcher> NodeMatchersToInputMatchers( + absl::Span> node_matchers) { + std::vector<::testing::Matcher> result; + absl::c_transform(node_matchers, std::back_inserter(result), + [](::testing::Matcher n) { + return ::testing::MakeMatcher(new InputMatcher(n, 0)); + }); + return result; +} +} // namespace + +::testing::Matcher impl::NodeWith( + absl::Span props) { + NodeMatcher* matcher = new NodeMatcher(); + for (const NodeMatcherProperties& prop : props) { + if (prop.name()) { + DCHECK(!matcher->name); + matcher->name = prop.name(); + } + + if (prop.op()) { + DCHECK(!matcher->op); + matcher->op = prop.op(); + } + + if (prop.constant_value()) { + DCHECK(!matcher->constant_value); + matcher->constant_value = prop.constant_value(); + } + + if (prop.assigned_device()) { + DCHECK(!matcher->assigned_device); + matcher->assigned_device = prop.assigned_device(); + } + + if (prop.input_nodes()) { + DCHECK(!matcher->input_matchers); + matcher->input_matchers = + NodeMatchersToInputMatchers(*prop.input_nodes()); + } + + if (prop.control_deps()) { + DCHECK(!matcher->control_dep_set); + matcher->control_dep_set = + ::testing::UnorderedElementsAreArray(*prop.control_deps()); + } + } + + return ::testing::MakeMatcher(matcher); +} + +impl::NodeMatcherProperties Name(string name) { + impl::NodeMatcherProperties props; + props.set_name(std::move(name)); + return props; +} + +// Matches a node with op `op`. +impl::NodeMatcherProperties Op(string op) { + impl::NodeMatcherProperties props; + props.set_op(std::move(op)); + return props; +} + +// Matches a node with assigned device `assigned_device`. +impl::NodeMatcherProperties AssignedDevice(string assigned_device) { + impl::NodeMatcherProperties props; + props.set_assigned_device(std::move(assigned_device)); + return props; +} + +impl::NodeMatcherProperties impl::Inputs( + absl::Span> inputs) { + std::vector<::testing::Matcher> inputs_vector; + absl::c_copy(inputs, std::back_inserter(inputs_vector)); + + impl::NodeMatcherProperties props; + props.set_input_nodes(std::move(inputs_vector)); + return props; +} + +impl::NodeMatcherProperties impl::CtrlDeps( + absl::Span> control_deps) { + std::vector<::testing::Matcher> control_deps_vector; + absl::c_copy(control_deps, std::back_inserter(control_deps_vector)); + + impl::NodeMatcherProperties props; + props.set_control_deps(std::move(control_deps_vector)); + return props; +} + +NodeMatcherProperties ConstantValue( + const ::tensorflow::Input::Initializer& val) { + TF_CHECK_OK(val.status); + NodeMatcherProperties props; + props.set_constant_value(val.tensor); + return props; +} + +::testing::Matcher Const( + const ::tensorflow::Input::Initializer& val) { + return NodeWith(ConstantValue(val)); +} +} // namespace matchers + +Node* FindNodeByName(Graph* g, absl::string_view name) { + for (Node* n : g->nodes()) { + if (n->name() == name) { + return n; + } + } + + return nullptr; +} +} // namespace testing +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/node_matchers.h b/tensorflow/compiler/jit/node_matchers.h new file mode 100644 index 0000000000..0437a7e95c --- /dev/null +++ b/tensorflow/compiler/jit/node_matchers.h @@ -0,0 +1,197 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Provides a set of matchers for tensorflow nodes. +// +// Example usage: +// +// tensorflow::Node* node = ...; +// EXPECT_THAT(node, NodeWith(Name("name"), Op("op"), +// Inputs(NodeWith(Name("input"))))) +// +// Matchable node properties (the expressions that go inside NodeWith(...)) +// are: +// +// - Name(string): matches the node name exactly. We will probably need to +// have this take a string matcher soon in the future. +// +// - Op(string): matches the op exactly. +// +// - AssignedDevice(string): matches the assigned device exactly. +// +// - Inputs(): matches the list of non-control inputs to the node +// exactly (i.e. does not match a suffix or a prefix). +// +// - CtrlDeps(): matches the list of control dependences on the +// node exactly but in any order. +// +// - ConstantValue(tensorflow::Input::Initializer init): matches a Const node +// with the constant value `init`. Implies Op("Const"). +// +// Node properties may not be repeated in a single NodeWith(...) matcher. +// E.g. NodeWith(Op("Foo"), Op("Bar")) will CHECK-fail. Since ConstantValue +// implies Op("Const"), a single NodeWith matcher can't have both +// ConstantValue(...) and Op(...). + +#ifndef TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_ +#define TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_ + +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { +namespace testing { +namespace matchers { + +namespace impl { + +// ----------------------------------------------------------------------------- +// Implementation details. + +// Properties that we match on for a particular Node. If a particular property +// is nullopt then any value for it is allowed. +class NodeMatcherProperties { + public: + using NodeSeqMatcher = std::vector<::testing::Matcher>; + + const absl::optional& name() const { return name_; } + const absl::optional& op() const { return op_; } + const absl::optional& assigned_device() const { + return assigned_device_; + } + const absl::optional& constant_value() const { + return constant_value_; + } + const absl::optional& input_nodes() const { + return input_nodes_; + } + const absl::optional& control_deps() const { + return control_deps_; + } + + void set_name(string name) { + DCHECK(IsEmpty()); + name_ = std::move(name); + } + + void set_op(string op) { + DCHECK(IsEmpty()); + op_ = std::move(op); + } + + void set_assigned_device(string assigned_device) { + DCHECK(IsEmpty()); + assigned_device_ = std::move(assigned_device); + } + + void set_constant_value(Tensor constant_value) { + DCHECK(IsEmpty()); + constant_value_ = std::move(constant_value); + op_ = "Const"; + } + + void set_input_nodes(NodeSeqMatcher input_nodes) { + DCHECK(IsEmpty()); + input_nodes_ = std::move(input_nodes); + } + + void set_control_deps(NodeSeqMatcher control_deps) { + DCHECK(IsEmpty()); + control_deps_ = std::move(control_deps); + } + + bool IsEmpty() const { + return !name().has_value() && !op().has_value() && + !input_nodes().has_value() && !control_deps().has_value(); + } + + private: + absl::optional name_; + absl::optional op_; + absl::optional assigned_device_; + absl::optional constant_value_; + absl::optional input_nodes_; + absl::optional control_deps_; +}; + +::testing::Matcher NodeWith( + absl::Span props); + +impl::NodeMatcherProperties Inputs( + absl::Span> inputs); + +impl::NodeMatcherProperties CtrlDeps( + absl::Span> control_deps); +} // namespace impl + +// ----------------------------------------------------------------------------- +// Public interface. + +// Matches a node with name `name`. +impl::NodeMatcherProperties Name(string name); + +// Matches a node with op `op`. +impl::NodeMatcherProperties Op(string op); + +// Matches a node with assigned device `assigned_device`. +impl::NodeMatcherProperties AssignedDevice(string assigned_device); + +// Matches a node with inputs `inputs`. +// +// `inputs` are ordered; `inputs`[i] must match input i. +template +impl::NodeMatcherProperties Inputs(Ts... inputs) { + return impl::Inputs({inputs...}); +} + +// Matches a node with control dependences `control_deps`. +// +// `control_deps` are unordered and will match the control deps of a node in any +// order. +template +impl::NodeMatcherProperties CtrlDeps(Ts... control_deps) { + return impl::CtrlDeps({control_deps...}); +} + +// Matches a constant node with value `val`. +impl::NodeMatcherProperties ConstantValue( + const ::tensorflow::Input::Initializer& val); + +// The main gmock matcher. See file comment for example usage. +template +::testing::Matcher NodeWith(Ts... args) { + std::array array = {args...}; + return impl::NodeWith(array); +} + +::testing::Matcher Const( + const ::tensorflow::Input::Initializer& val); +} // namespace matchers + +// If `g` has a node named `name` returns it, otherwise returns null. +Node* FindNodeByName(Graph* g, absl::string_view name); +} // namespace testing +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_ diff --git a/tensorflow/compiler/jit/node_matchers_test.cc b/tensorflow/compiler/jit/node_matchers_test.cc new file mode 100644 index 0000000000..93a8994307 --- /dev/null +++ b/tensorflow/compiler/jit/node_matchers_test.cc @@ -0,0 +1,179 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/node_matchers.h" + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/math_ops.h" + +namespace tensorflow { +namespace testing { +namespace { + +using ::testing::_; + +using testing::matchers::AssignedDevice; +using testing::matchers::ConstantValue; +using testing::matchers::CtrlDeps; +using testing::matchers::Inputs; +using testing::matchers::Name; +using testing::matchers::NodeWith; +using testing::matchers::Op; + +template +string Explain(const T& t, const M& m) { + ::testing::StringMatchResultListener listener; + EXPECT_THAT(t, ::testing::Not(m)); // For the error message. + EXPECT_FALSE(m.MatchAndExplain(t, &listener)); + return listener.str(); +} + +TEST(NodeMatchers, CheckAgainstConstant) { + Scope root = Scope::NewRootScope().ExitOnError(); + Output placeholder = + ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT); + + EXPECT_THAT(placeholder.node(), NodeWith(Op("Placeholder"))); + EXPECT_THAT(placeholder.node(), NodeWith(Name("placeholder"))); + EXPECT_THAT(placeholder.node(), + NodeWith(Op("Placeholder"), Name("placeholder"))); + EXPECT_THAT(placeholder.node(), + NodeWith(Name("placeholder"), Op("Placeholder"))); + EXPECT_THAT(placeholder.node(), NodeWith(Inputs())); + EXPECT_THAT(placeholder.node(), + NodeWith(Op("Placeholder"), Name("placeholder"), Inputs())); + + EXPECT_EQ(Explain(placeholder.node(), NodeWith(Op("Add"))), + "\nexpected op Add but found Placeholder"); + EXPECT_EQ(Explain(placeholder.node(), NodeWith(Name("add"))), + "\nexpected name add but found placeholder"); + EXPECT_EQ(Explain(placeholder.node(), NodeWith(Inputs(NodeWith()))), + "\nexpected 1 inputs but node has 0"); +} + +TEST(NodeMatchers, CheckAgainstBinary) { + Scope root = Scope::NewRootScope().ExitOnError(); + + Output placeholder_a = + ops::Placeholder(root.WithOpName("placeholder_a"), DT_FLOAT); + Output placeholder_b = + ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT); + Output add = ops::Add(root.WithOpName("add"), placeholder_a, placeholder_b); + + EXPECT_THAT(add.node(), NodeWith(Op("Add"), Name("add"), + Inputs(NodeWith(Name("placeholder_a")), + NodeWith(Name("placeholder_b"))))); + + EXPECT_EQ(Explain(add.node(), NodeWith(Inputs())), + "\nexpected 0 inputs but node has 2"); + EXPECT_EQ( + Explain(add.node(), NodeWith(Inputs(NodeWith(Name("blah")), _))), + "\ninput 0 does not match expected:\nname: blah, \nsource does not match " + "expected name: blah\n\t\nexpected name blah but found placeholder_a"); + EXPECT_EQ( + Explain(add.node(), NodeWith(Inputs(_, NodeWith(Name("blah"))))), + "\ninput 1 does not match expected:\nname: blah, \nsource does not match " + "expected name: blah\n\t\nexpected name blah but found placeholder_b"); +} + +TEST(NodeMatchers, CheckControlDependence) { + Scope root = Scope::NewRootScope().ExitOnError(); + + Output placeholder_a = + ops::Placeholder(root.WithOpName("placeholder_a"), DT_FLOAT); + Output placeholder_b = + ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT); + Output placeholder_c = + ops::Placeholder(root.WithOpName("placeholder_c"), DT_FLOAT); + Output placeholder_d = + ops::Placeholder(root.WithOpName("placeholder_d"), DT_FLOAT); + + root.graph()->AddControlEdge(placeholder_a.node(), placeholder_c.node()); + root.graph()->AddControlEdge(placeholder_b.node(), placeholder_c.node()); + + EXPECT_THAT(placeholder_c.node(), + NodeWith(Name("placeholder_c"), + CtrlDeps(NodeWith(Name("placeholder_a")), + NodeWith(Name("placeholder_b"))))); + EXPECT_THAT(placeholder_d.node(), + NodeWith(Name("placeholder_d"), CtrlDeps())); + + EXPECT_EQ( + Explain(placeholder_c.node(), NodeWith(CtrlDeps())), + "ctrl_deps, which has 2 elements, does not match expected: is empty"); + EXPECT_EQ(Explain(placeholder_d.node(), NodeWith(CtrlDeps(NodeWith()))), + "ctrl_deps does not match expected: has 1 element and that element " + "is any node"); +} + +TEST(NodeMatchers, ConstVaulue) { + Scope root = Scope::NewRootScope().ExitOnError(); + Output placeholder = + ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT); + Output const_0d = ops::Const(root.WithOpName("const_0d"), 42); + + Output const_2d = ops::Const(root.WithOpName("const_2d"), {{1, 2}, {4, 3}}); + + EXPECT_THAT(const_0d.node(), NodeWith(ConstantValue(42))); + EXPECT_THAT(const_0d.node(), NodeWith(ConstantValue(42), Name("const_0d"))); + + EXPECT_THAT(const_2d.node(), NodeWith(ConstantValue({{1, 2}, {4, 3}}))); + + EXPECT_EQ(Explain(placeholder.node(), NodeWith(ConstantValue(42))), + "\nexpected op Const but found Placeholder"); + EXPECT_EQ( + Explain(const_0d.node(), NodeWith(ConstantValue(43))), + "\nmismatch in constant tensor at index 0 expected = 43 actual = 42"); + EXPECT_EQ( + Explain(const_0d.node(), NodeWith(ConstantValue({{1, 2}, {4, 3}}))), + "\nwas looking for tensor with 4 elements, found tensor with 1 elements"); + EXPECT_EQ( + Explain(const_2d.node(), NodeWith(ConstantValue(42))), + "\nwas looking for tensor with 1 elements, found tensor with 4 elements"); +} + +TEST(NodeMatchers, AssignedDevice) { + Scope root = Scope::NewRootScope().ExitOnError(); + + Output placeholder_a = + ops::Placeholder(root.WithOpName("placeholder_a"), DT_FLOAT); + Output placeholder_b = + ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT); + + Output assigned_add = + ops::Add(root.WithOpName("assigned_add"), placeholder_a, placeholder_b); + assigned_add.node()->set_assigned_device_name( + "/job:localhost/replica:0/task:0/device:CPU:0"); + + Output unassigned_add = + ops::Add(root.WithOpName("unassigned_add"), placeholder_a, placeholder_b); + + EXPECT_THAT( + assigned_add.node(), + NodeWith(AssignedDevice("/job:localhost/replica:0/task:0/device:CPU:0"))); + EXPECT_THAT(unassigned_add.node(), NodeWith(AssignedDevice(""))); + + EXPECT_EQ(Explain(unassigned_add.node(), + NodeWith(AssignedDevice( + "/job:localhost/replica:0/task:0/device:CPU:0"))), + "\nexpected assigned_device " + "/job:localhost/replica:0/task:0/device:CPU:0 but found \"\""); +} + +} // namespace +} // namespace testing +} // namespace tensorflow -- GitLab From a6ee64cd216b3ac440262e1f4ec7872fe7026df6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 16 Sep 2018 13:38:24 -0700 Subject: [PATCH 0239/1357] Conditionally allow changing a non-fusion computation root_instruction shape. PiperOrigin-RevId: 213191899 --- tensorflow/compiler/xla/service/hlo_computation.cc | 6 +++--- tensorflow/compiler/xla/service/hlo_computation.h | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 8c6903d766..601a008d9f 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -279,11 +279,11 @@ Status HloComputation::RemoveInstruction(HloInstruction* instruction) { return Status::OK(); } -void HloComputation::set_root_instruction( - HloInstruction* new_root_instruction) { +void HloComputation::set_root_instruction(HloInstruction* new_root_instruction, + bool accept_different_shape) { // The shape of the root (ignoring layout) is an invariant of the computation // for non-fusion cases. - if (!IsFusionComputation()) { + if (!IsFusionComputation() && !accept_different_shape) { CHECK(ShapeUtil::Compatible(new_root_instruction->shape(), root_instruction_->shape())) << new_root_instruction->shape() << " is incompatible with " diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 91c5234a6f..a880e9ab30 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -134,9 +134,11 @@ class HloComputation { Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction); // Set the root of the computation to the given instruction. The instruction - // must have already been added to the computation and have the same shape as - // the result of the computation for non fusion computations. - void set_root_instruction(HloInstruction* new_root_instruction); + // must have already been added to the computation. In addition it must have + // the same shape as the result of the computation for non fusion + // computations, except if accept_different_shape is set to true. + void set_root_instruction(HloInstruction* new_root_instruction, + bool accept_different_shape = false); // Return the root instruction of the computation. The root instruction is the // instruction which produces the output of the computation. -- GitLab From 79458017805905a7840ec15039d08ac010ecb9d3 Mon Sep 17 00:00:00 2001 From: Jenny Sahng Date: Mon, 17 Sep 2018 10:21:11 +1200 Subject: [PATCH 0240/1357] Update broken link to intro on ADAGRAD --- tensorflow/contrib/optimizer_v2/adagrad.py | 2 +- tensorflow/python/training/adagrad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/optimizer_v2/adagrad.py b/tensorflow/contrib/optimizer_v2/adagrad.py index c333d1e089..d17d6772e2 100644 --- a/tensorflow/contrib/optimizer_v2/adagrad.py +++ b/tensorflow/contrib/optimizer_v2/adagrad.py @@ -31,7 +31,7 @@ class AdagradOptimizer(optimizer_v2.OptimizerV2): See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) or this - [intro](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf). + [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf). """ def __init__(self, learning_rate, initial_accumulator_value=0.1, diff --git a/tensorflow/python/training/adagrad.py b/tensorflow/python/training/adagrad.py index 3508b98475..cc0da26b27 100644 --- a/tensorflow/python/training/adagrad.py +++ b/tensorflow/python/training/adagrad.py @@ -34,7 +34,7 @@ class AdagradOptimizer(optimizer.Optimizer): See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) or this - [intro](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf). + [intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf). """ def __init__(self, learning_rate, initial_accumulator_value=0.1, -- GitLab From 297fafbe9464372e1641c0f376f47569a23aeffa Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 17 Sep 2018 00:41:07 +0000 Subject: [PATCH 0241/1357] Support gradient_multipliers as tensor for optimize_loss This fix tries to address the issue raised in 22295 where gradient_multipliers for tf.contrib.layers.optimize_loss() does not support tensor as input. This fix update the optimize_loss to allow gradient_multipliers passed as dict of tensors. This fix fixes 22295. Signed-off-by: Yong Tang --- tensorflow/contrib/layers/python/layers/optimizers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index 69d927e1b3..2ac58597c2 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -433,8 +433,7 @@ def _multiply_gradients(grads_and_vars, gradient_multipliers): if (grad is not None and (var in gradient_multipliers or var.name in gradient_multipliers)): key = var if var in gradient_multipliers else var.name - multiplier = constant_op.constant( - gradient_multipliers[key], dtype=dtypes.float32) + multiplier = gradient_multipliers[key] if isinstance(grad, ops.IndexedSlices): grad_values = grad.values * multiplier grad = ops.IndexedSlices(grad_values, grad.indices, grad.dense_shape) -- GitLab From 921186571f792562fa234f7f0a7516b67e867930 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 17 Sep 2018 00:47:45 +0000 Subject: [PATCH 0242/1357] Add test cases to allow gradient_multipliers passed as tensor Signed-off-by: Yong Tang --- .../layers/python/layers/optimizers_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py index 29dede2a49..6a7df23011 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers_test.py +++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py @@ -250,6 +250,24 @@ class OptimizersTest(test.TestCase): self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1) + def testGradientMultiplyTensor(self): + with self.cached_session() as session: + x, var, loss, global_step = _setup_model() + v = array_ops.placeholder(dtypes.float32, []) + train = optimizers_lib.optimize_loss( + loss, + global_step, + learning_rate=0.1, + optimizer="SGD", + gradient_multipliers={var: v}) + variables.global_variables_initializer().run() + session.run(train, feed_dict={x: 5, v: 7.}) + var_value, global_step_value = session.run([var, global_step]) + # var(0) = 10, x = 5, var(0)/dx = 5, + # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx + self.assertAlmostEqual(var_value, 6.5, 4) + self.assertEqual(global_step_value, 1) + def testIgnoreVariablesWithNoGradients(self): _, _, loss, global_step = _setup_model() -- GitLab From 8e6599d2d7b54fe8fba37ad1cc045b62bd7e50e5 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 17 Sep 2018 01:06:54 +0000 Subject: [PATCH 0243/1357] Allow different dtype of Tensor Signed-off-by: Yong Tang --- tensorflow/contrib/layers/python/layers/optimizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index 2ac58597c2..d92de3b58c 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -438,6 +438,6 @@ def _multiply_gradients(grads_and_vars, gradient_multipliers): grad_values = grad.values * multiplier grad = ops.IndexedSlices(grad_values, grad.indices, grad.dense_shape) else: - grad *= multiplier + grad *= math_ops.cast(multiplier, grad.dtype) multiplied_grads_and_vars.append((grad, var)) return multiplied_grads_and_vars -- GitLab From 7d8316fb85b21546e3df2aef701f1cfa9f92b6ba Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 17 Sep 2018 01:07:16 +0000 Subject: [PATCH 0244/1357] Add additional test cases Signed-off-by: Yong Tang --- .../layers/python/layers/optimizers_test.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py index 6a7df23011..b4d1239e76 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers_test.py +++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py @@ -250,7 +250,7 @@ class OptimizersTest(test.TestCase): self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1) - def testGradientMultiplyTensor(self): + def testGradientMultiplyInt32Tensor(self): with self.cached_session() as session: x, var, loss, global_step = _setup_model() v = array_ops.placeholder(dtypes.float32, []) @@ -268,6 +268,24 @@ class OptimizersTest(test.TestCase): self.assertAlmostEqual(var_value, 6.5, 4) self.assertEqual(global_step_value, 1) + def testGradientMultiplyInt64Tensor(self): + with self.cached_session() as session: + x, var, loss, global_step = _setup_model() + v = array_ops.placeholder(dtypes.float64, []) + train = optimizers_lib.optimize_loss( + loss, + global_step, + learning_rate=0.1, + optimizer="SGD", + gradient_multipliers={var: v}) + variables.global_variables_initializer().run() + session.run(train, feed_dict={x: 5, v: 7.}) + var_value, global_step_value = session.run([var, global_step]) + # var(0) = 10, x = 5, var(0)/dx = 5, + # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx + self.assertAlmostEqual(var_value, 6.5, 4) + self.assertEqual(global_step_value, 1) + def testIgnoreVariablesWithNoGradients(self): _, _, loss, global_step = _setup_model() -- GitLab From 2501870be0df24ca0e191710b1de139e195616a3 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Sun, 16 Sep 2018 19:50:17 -0700 Subject: [PATCH 0245/1357] Fix some typos in the doc for XlaDynamicSlice phawkins@ suggested these in cr/212715067 but I accidentally made the changes in another client. PiperOrigin-RevId: 213208811 --- tensorflow/compiler/tf2xla/ops/xla_ops.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index 02363500ef..733eeed3c6 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -121,8 +121,8 @@ Wraps the XLA DynamicSlice operator, documented at DynamicSlice extracts a sub-array from the input array at dynamic start_indices. The size of the slice in each dimension is passed in size_indices, which specify the end point of exclusive slice intervals in each -dimension -- [start, start + size). The shape of start_indices must be rank == -1, with dimension size equal to the rank of operand. +dimension -- [start, start + size). The shape of start_indices must have rank 1, +with dimension size equal to the rank of operand. input: A `Tensor` of type T. @@ -131,7 +131,8 @@ start_indices: Rank 1 tensor of N integers containing the starting indices of start_indices: List of N integers containing the slice size for each dimension. Each value must be strictly greater than zero, and start + size - must be less + must be less than or equal to the size of the dimension to avoid + implementation defined behavior. )doc"); REGISTER_OP("XlaDynamicUpdateSlice") -- GitLab From 791f48d3aaaa875c23de1484f7ef1d0656fbabca Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Sun, 16 Sep 2018 20:11:53 -0700 Subject: [PATCH 0246/1357] Improve TFLite iOS doc. PiperOrigin-RevId: 213210253 --- tensorflow/contrib/lite/g3doc/ios.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/g3doc/ios.md b/tensorflow/contrib/lite/g3doc/ios.md index a83d2c8fec..3b9fcca811 100644 --- a/tensorflow/contrib/lite/g3doc/ios.md +++ b/tensorflow/contrib/lite/g3doc/ios.md @@ -1,5 +1,10 @@ -# TensorFlow Lite for iOS +# Build TensorFlow Lite for iOS + +This document describes how to build TensorFlow Lite iOS library. If you just +want to use it, the easiest way is using the TensorFlow Lite CocoaPod releases. +See [TensorFlow Lite iOS Demo](demo_ios.md) for examples. + ## Building -- GitLab From b6a8ade2ce2b42ed4bed67aee40da4c1705e01fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 16 Sep 2018 20:44:35 -0700 Subject: [PATCH 0247/1357] Add ZerosLike to schema. PiperOrigin-RevId: 213212445 --- tensorflow/contrib/lite/builtin_ops.h | 1 + .../lite/core/api/flatbuffer_conversions.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 124 +++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 5e97b777fc..7f33942c90 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -118,6 +118,7 @@ typedef enum { kTfLiteBuiltinFloorDiv = 90, kTfLiteBuiltinReduceAny = 91, kTfLiteBuiltinSquare = 92, + kTfLiteBuiltinZerosLike = 93, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index f4d2839b1b..ceb2bbd612 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -618,6 +618,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_LOGICAL_NOT: case BuiltinOperator_FLOOR_DIV: case BuiltinOperator_SQUARE: + case BuiltinOperator_ZEROS_LIKE: break; } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index f814b90d66..3b6a81ffde 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -673,6 +673,7 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_FLOOR_DIV: case tflite::BuiltinOperator_REDUCE_ANY: case tflite::BuiltinOperator_SQUARE: + case tflite::BuiltinOperator_ZEROS_LIKE: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index f0db22d581..4c339317cb 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -174,6 +174,7 @@ enum BuiltinOperator : byte { FLOOR_DIV = 90, REDUCE_ANY = 91, SQUARE = 92, + ZEROS_LIKE = 93, } // Options for the builtin operators. @@ -244,6 +245,7 @@ union BuiltinOptions { UnpackOptions, FloorDivOptions, SquareOptions, + ZerosLikeOptions, } enum Padding : byte { SAME, VALID } @@ -588,6 +590,9 @@ table FloorDivOptions { table SquareOptions { } +table ZerosLikeOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 8c086a5e67..03c227f987 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -229,6 +229,9 @@ struct FloorDivOptionsT; struct SquareOptions; struct SquareOptionsT; +struct ZerosLikeOptions; +struct ZerosLikeOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -387,11 +390,12 @@ enum BuiltinOperator { BuiltinOperator_FLOOR_DIV = 90, BuiltinOperator_REDUCE_ANY = 91, BuiltinOperator_SQUARE = 92, + BuiltinOperator_ZEROS_LIKE = 93, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_SQUARE + BuiltinOperator_MAX = BuiltinOperator_ZEROS_LIKE }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[92] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[93] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -484,7 +488,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[92] { BuiltinOperator_REDUCE_MIN, BuiltinOperator_FLOOR_DIV, BuiltinOperator_REDUCE_ANY, - BuiltinOperator_SQUARE + BuiltinOperator_SQUARE, + BuiltinOperator_ZEROS_LIKE }; return values; } @@ -584,6 +589,7 @@ inline const char **EnumNamesBuiltinOperator() { "FLOOR_DIV", "REDUCE_ANY", "SQUARE", + "ZEROS_LIKE", nullptr }; return names; @@ -662,11 +668,12 @@ enum BuiltinOptions { BuiltinOptions_UnpackOptions = 64, BuiltinOptions_FloorDivOptions = 65, BuiltinOptions_SquareOptions = 66, + BuiltinOptions_ZerosLikeOptions = 67, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_SquareOptions + BuiltinOptions_MAX = BuiltinOptions_ZerosLikeOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[67] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[68] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -734,7 +741,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[67] { BuiltinOptions_LogicalNotOptions, BuiltinOptions_UnpackOptions, BuiltinOptions_FloorDivOptions, - BuiltinOptions_SquareOptions + BuiltinOptions_SquareOptions, + BuiltinOptions_ZerosLikeOptions }; return values; } @@ -808,6 +816,7 @@ inline const char **EnumNamesBuiltinOptions() { "UnpackOptions", "FloorDivOptions", "SquareOptions", + "ZerosLikeOptions", nullptr }; return names; @@ -1086,6 +1095,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1645,6 +1658,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_SquareOptions ? reinterpret_cast(value) : nullptr; } + ZerosLikeOptionsT *AsZerosLikeOptions() { + return type == BuiltinOptions_ZerosLikeOptions ? + reinterpret_cast(value) : nullptr; + } + const ZerosLikeOptionsT *AsZerosLikeOptions() const { + return type == BuiltinOptions_ZerosLikeOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -5888,6 +5909,46 @@ inline flatbuffers::Offset CreateSquareOptions( flatbuffers::Offset CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct ZerosLikeOptionsT : public flatbuffers::NativeTable { + typedef ZerosLikeOptions TableType; + ZerosLikeOptionsT() { + } +}; + +struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ZerosLikeOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + ZerosLikeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ZerosLikeOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateZerosLikeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ZerosLikeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -6219,6 +6280,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const SquareOptions *builtin_options_as_SquareOptions() const { return builtin_options_type() == BuiltinOptions_SquareOptions ? static_cast(builtin_options()) : nullptr; } + const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { + return builtin_options_type() == BuiltinOptions_ZerosLikeOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -6514,6 +6578,10 @@ template<> inline const SquareOptions *Operator::builtin_options_as inline const ZerosLikeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ZerosLikeOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -8782,6 +8850,29 @@ inline flatbuffers::Offset CreateSquareOptions(flatbuffers::FlatB _fbb); } +inline ZerosLikeOptionsT *ZerosLikeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ZerosLikeOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ZerosLikeOptions::UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ZerosLikeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateZerosLikeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ZerosLikeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateZerosLikeOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -9235,6 +9326,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -9517,6 +9612,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -9787,6 +9886,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateSquareOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(value); + return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -10057,6 +10160,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new SquareOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_ZerosLikeOptions: { + value = new ZerosLikeOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -10394,6 +10501,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From 1f7e51560e26992e8e56f6426525c1df1e53b974 Mon Sep 17 00:00:00 2001 From: Pan Daoxin Date: Mon, 17 Sep 2018 13:42:15 +0800 Subject: [PATCH 0248/1357] Some changes for commit. --- tensorflow/core/kernels/BUILD | 5 +-- tensorflow/core/kernels/mkl_slice_op.cc | 46 ++++++++++++------------- tensorflow/core/ops/array_ops.cc | 40 +++++++++++++++++++-- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 2582814d08..f5682b6e13 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6331,10 +6331,7 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_slice_op", prefix = "mkl_slice_op", - deps = ARRAY_DEPS + if_mkl([ - "//third_party/mkl:intel_binary_blob", - "@mkl_dnn", - ]), + deps = ARRAY_DEPS + mkl_deps(), ) tf_mkl_kernel_library( diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc index 86fb572478..20c4921390 100644 --- a/tensorflow/core/kernels/mkl_slice_op.cc +++ b/tensorflow/core/kernels/mkl_slice_op.cc @@ -48,7 +48,7 @@ gtl::InlinedVector IntTensorToInt64Vec(const Tensor& tensor) { out.push_back(tensor.flat()(i)); } } else { - LOG(FATAL) << "begin must be either int32 or int64"; + LOG(FATAL) << "tensor must be either int32 or int64"; } return out; } @@ -59,6 +59,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice; // A version of SharedValidation (slice_op.h) written for input that is in // either Mkl layout or Tensorflow layout. +// A shared code to validate input shapes and check for identity, which is not dependent on the type of T. +// We do this to reduce code size by not duplicating all this for all T (float, double, int32, etc.) static void ValidateMklInputs(OpKernelContext* context, bool* is_identity, gtl::InlinedVector* begin, gtl::InlinedVector* size) { @@ -81,19 +83,19 @@ static void ValidateMklInputs(OpKernelContext* context, bool* is_identity, TensorShape input_tf_shape = input_mkl_shape.IsMklTensor() ? input_mkl_shape.GetTfShape() : input.shape(); + const int input_dims = input_tf_shape.dims(); OP_REQUIRES( context, context->op_kernel().IsLegacyVector(begin_tensor.shape()) && context->op_kernel().IsLegacyVector(size_tensor.shape()) && - begin_tensor.NumElements() == input_tf_shape.dims() && - size_tensor.NumElements() == input_tf_shape.dims(), + begin_tensor.NumElements() == input_dims && + size_tensor.NumElements() == input_dims, errors::InvalidArgument( "Expected begin and size arguments to be 1-D tensors of size ", - input_tf_shape.dims(), ", but got shapes ", + input_dims, ", but got shapes ", begin_tensor.shape().DebugString(), " and ", size_tensor.shape().DebugString(), " instead.")); - const int input_dims = input_tf_shape.dims(); *begin = IntTensorToInt64Vec(begin_tensor); *size = IntTensorToInt64Vec(size_tensor); for (int i = 0; i < input_dims; ++i) { @@ -152,7 +154,6 @@ static void CheckCommonCasesForMklInputs(OpKernelContext* context, // output. AllocateOutputSetMklShape(context, 0, input_mkl_shape); *done = true; - return; } } @@ -172,8 +173,8 @@ class MklDnnSliceOp : public OpKernel { CheckCommonCasesForMklInputs(context, &begin, &size, &done); if (!context->status().ok() || done == true) return; - // MKL-DNN does not have this limitation of supporting less than 8 dimension - // tensor. But we are mimicking functionality of Eigen Slice op for CPU. + // Though MKL-DNN supports more than 8 dimension and less than 12 dimension tensor. + // But we are mimicking functionality of Eigen Slice op for CPU. if (begin.size() >= 8) { OP_REQUIRES( context, false, @@ -181,7 +182,6 @@ class MklDnnSliceOp : public OpKernel { } ComputeMklDnnSlice(context, begin, size); - return; } private: @@ -203,7 +203,7 @@ class MklDnnSliceOp : public OpKernel { // 2. create view primitive descriptor in_submem_pd based on in_mem_pd, // initial offsets, and sub-sizes // 3. create memory primitive descriptor out_mem_pd and memory primitive - // out_mem_p for the output (the logical sizes should much sub-sizes + // out_mem_p for the output (the logical sizes should match sub-sizes // used in step 2, but the format might be arbitrary) // 4. create reorder primitive descriptor reorder_pd based on in_submem_pd // and out_mem_pd @@ -232,6 +232,9 @@ class MklDnnSliceOp : public OpKernel { Tensor* output_tensor = nullptr; MklDnnShape output_mkl_shape; + + // If no dimension is selected in slice, the result should be empty. + // Just return an empty output tensor, and a dummy Mkl-shape tensor. if (empty) { // for empty dims auto shape_to = MklDnnDimsToTFShape(size_dims); AllocateOutputSetMklShape(context, 0, &output_tensor, shape_to, @@ -250,23 +253,19 @@ class MklDnnSliceOp : public OpKernel { auto input_tf_format = MklDnnDataFormatToTFDataFormat(input_mkl_format); begin_dims = MklDnnDimsInNCHW(begin_dims, input_tf_format); size_dims = MklDnnDimsInNCHW(size_dims, input_tf_format); - } - - // Initialize input dimensions and strides to be used when input is not in - // MklDnn layout. - memory::dims input_dims, input_strides; - if (!input_mkl_shape.IsMklTensor()) { + auto input_md = input_mkl_shape.GetMklLayout(); + src.SetUsrMem(input_md, &input_tensor); + } else { + // Initialize input dimensions and strides to be used when input is not in + // MklDnn layout. + memory::dims input_dims, input_strides; input_dims = TFShapeToMklDnnDims(input_tensor.shape()); input_strides = CalculateTFStrides(input_dims); + // Create input memory descriptor. + auto input_md = MklDnnData::CreateBlockedMemDesc(input_dims, input_strides); + src.SetUsrMem(input_md, &input_tensor); } - // Create input memory descriptor. - auto input_md = - input_mkl_shape.IsMklTensor() - ? input_mkl_shape.GetMklLayout() - : MklDnnData::CreateBlockedMemDesc(input_dims, input_strides); - src.SetUsrMem(input_md, &input_tensor); - // Step 2 - create view primitive descriptor auto view_pd = view::primitive_desc(src.GetUsrMemPrimDesc(), size_dims, begin_dims) @@ -291,6 +290,7 @@ class MklDnnSliceOp : public OpKernel { reorder::primitive_desc(view_pd, output.GetUsrMemPrimDesc()); // Step 5 - create reorder primitive itself. net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *output.GetUsrMem())); + // Execute the reorder primitive. stream(stream::kind::eager).submit(net).wait(); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 18cc529a9b..2dec430710 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -1681,8 +1681,8 @@ REGISTER_OP("_MklSlice") TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &begin_value)); // NOTE(mrry): We can't use `MakeShapeFromShapeTensor` for `sizes` because - // it might contain -1, which can't be represented -1 in the ShapeHandle - // would meqan "unknown". + // it might contain -1, which can't be represented. (-1 in the ShapeHandle + // would mean "unknown".) const Tensor* sizes_value = c->input_tensor(3); if (sizes_value != nullptr) { @@ -1698,7 +1698,43 @@ REGISTER_OP("_MklSlice") TF_RETURN_IF_ERROR( SliceHelper(c, begin_value, sizes_value, &dims)); } + + c->set_output(0, c->MakeShape(dims)); + return Status::OK(); + } else { + // In case `sizes` is not available (`sizes_value` is null), + // we could try to use `MakeShapeFromShapeTensor` here. + // If sizes contain -1, we will simply consider it as `Unknown`. + // This is less than ideal but still an improvement of shape inference. + // The following is an example that returns [None, 1, None] with this + // code path: + // z = tf.zeros((1, 2, 3)) + // m = tf.slice(z, [0, 0, 0], [tf.constant(1) + 0, 1, -1]) + // m.get_shape().as_list() + ShapeHandle sizes_value; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(2, &sizes_value)); + if (c->RankKnown(sizes_value)) { + TF_RETURN_IF_ERROR( + c->WithRank(begin_value, c->Rank(sizes_value), &begin_value)); + std::vector dims; + dims.reserve(c->Rank(sizes_value)); + for (int i = 0; i < c->Rank(sizes_value); ++i) { + dims.emplace_back(c->Dim(sizes_value, i)); + } + c->set_output(0, c->MakeShape(dims)); + return Status::OK(); + } + + // We might know the rank of the input. + if (c->RankKnown(input)) { + c->set_output(0, c->UnknownShapeOfRank(c->Rank(input))); + return Status::OK(); + } else { + return shape_inference::UnknownShape(c); + } } + + return Status::OK(); }); #endif -- GitLab From f1d42c8967410db1e08c0b6d62dc1fc4844165a8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 00:39:12 -0700 Subject: [PATCH 0249/1357] Implement ZerosLike PiperOrigin-RevId: 213227615 --- tensorflow/contrib/lite/build_def.bzl | 1 + .../lite/g3doc/tf_ops_compatibility.md | 11 +++ tensorflow/contrib/lite/kernels/BUILD | 15 ++++ tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/kernels/zeros_like.cc | 73 +++++++++++++++++ .../contrib/lite/kernels/zeros_like_test.cc | 78 +++++++++++++++++++ .../contrib/lite/testing/generate_examples.py | 25 ++++++ .../contrib/lite/toco/export_tensorflow.cc | 17 ++++ .../propagate_fixed_sizes.cc | 1 + .../contrib/lite/toco/import_tensorflow.cc | 1 + tensorflow/contrib/lite/toco/model.h | 11 +++ .../contrib/lite/toco/tflite/operator.cc | 2 + .../contrib/lite/toco/tflite/operator_test.cc | 2 + tensorflow/contrib/lite/toco/tooling_util.cc | 1 + 14 files changed, 240 insertions(+) create mode 100644 tensorflow/contrib/lite/kernels/zeros_like.cc create mode 100644 tensorflow/contrib/lite/kernels/zeros_like_test.cc diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 52b994ee92..fc4d9b4f17 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -294,6 +294,7 @@ def generated_test_models(): #"transpose_conv", # disabled due to b/111213074 "unpack", "where", + "zeros_like", ] def generated_test_conversion_modes(): diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 8660d29855..b0dfb0fed1 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -866,6 +866,17 @@ Outputs { } ``` +**ZEROS_LIKE** + +``` +Inputs { + 0: a tensor +} +Outputs { + 0: A tensor of the same shape and type as x but filled with zeros +} +``` + And these are TensorFlow Lite operations that are present but not ready for custom models yet: diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 40f28aeab4..f52d29ea76 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -223,6 +223,7 @@ cc_library( "unidirectional_sequence_lstm.cc", "unidirectional_sequence_rnn.cc", "unpack.cc", + "zeros_like.cc", ], hdrs = [ ], @@ -1284,6 +1285,20 @@ tf_cc_test( ], ) +tf_cc_test( + name = "zeros_like_test", + size = "small", + srcs = ["zeros_like_test.cc"], + tags = ["tflite_not_portable_ios"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 14296d3a9f..6e35799c35 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -119,6 +119,7 @@ TfLiteRegistration* Register_LOGICAL_NOT(); TfLiteRegistration* Register_UNPACK(); TfLiteRegistration* Register_FLOOR_DIV(); TfLiteRegistration* Register_SQUARE(); +TfLiteRegistration* Register_ZEROS_LIKE(); TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) { context->ReportError( @@ -245,6 +246,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK()); AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV()); AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE()); + AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/contrib/lite/kernels/zeros_like.cc b/tensorflow/contrib/lite/kernels/zeros_like.cc new file mode 100644 index 0000000000..cce5240a9b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/zeros_like.cc @@ -0,0 +1,73 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/c/c_api_internal.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace zeros_like { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + output->type = input->type; + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const int num_elements = NumElements(input); + switch (input->type) { + case kTfLiteInt64: + memset(GetTensorData(output), 0, num_elements * sizeof(int64_t)); + break; + case kTfLiteInt32: + memset(GetTensorData(output), 0, num_elements * sizeof(int32_t)); + break; + case kTfLiteFloat32: + memset(GetTensorData(output), 0, num_elements * sizeof(float)); + break; + default: + context->ReportError(context, + "ZerosLike only currently supports int64, int32, " + "and float32, got %d.", + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace zeros_like + +TfLiteRegistration* Register_ZEROS_LIKE() { + static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, + zeros_like::Prepare, zeros_like::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/zeros_like_test.cc b/tensorflow/contrib/lite/kernels/zeros_like_test.cc new file mode 100644 index 0000000000..d3382d1d5b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/zeros_like_test.cc @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class ZerosLikeOpModel : public SingleOpModel { + public: + explicit ZerosLikeOpModel(const TensorData& input) { + input_ = AddInput(input); + output_ = AddOutput(input); + SetBuiltinOp(BuiltinOperator_ZEROS_LIKE, BuiltinOptions_ZerosLikeOptions, + CreateZerosLikeOptions(builder_).Union()); + BuildInterpreter({GetShape(input_)}); + } + + int input() { return input_; } + int output() { return output_; } + + protected: + int input_; + int output_; +}; + +TEST(ZerosLikeOpModel, ZerosLikeFloat) { + ZerosLikeOpModel m({TensorType_FLOAT32, {2, 3}}); + m.PopulateTensor(m.input(), {-2.0, -1.0, 0.0, 1.0, 2.0, 3.0}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({0.0, 0.0, 0.0, 0.0, 0.0, 0.0})); + EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({2, 3})); +} + +TEST(ZerosLikeOpModel, ZerosLikeInt32) { + ZerosLikeOpModel m({TensorType_INT32, {1, 2, 2, 1}}); + m.PopulateTensor(m.input(), {-2, -1, 0, 3}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({0, 0, 0, 0})); + EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 2, 2, 1})); +} + +TEST(ZerosLikeOpModel, ZerosLikeInt64) { + ZerosLikeOpModel m({TensorType_INT64, {1, 2, 2, 1}}); + m.PopulateTensor(m.input(), {-2, -1, 0, 3}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({0, 0, 0, 0})); + EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 2, 2, 1})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 3754b58b23..014c80b5ef 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -2834,6 +2834,31 @@ def make_neg_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_zeros_like_tests(zip_path): + """Make a set of tests to do zeros_like.""" + + test_parameters = [{ + "input_dtype": [tf.float32, tf.int32, tf.int64], + "input_shape": [[], [1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], + }] + + def build_graph(parameters): + """Build the zeros_like op testing graph.""" + input_tensor = tf.placeholder( + dtype=parameters["input_dtype"], + name="input", + shape=parameters["input_shape"]) + out = tf.zeros_like(input_tensor) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + values = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [values], sess.run(outputs, feed_dict=dict(zip(inputs, [values]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def _make_elementwise_tests(op): """Make a set of tests to do element-wise operations.""" diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index b52a79282c..3a534300ae 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -1968,6 +1968,19 @@ void ConvertUnpackOperator(const Model& model, const UnpackOperator& src_op, (*unpack_op->mutable_attr())["axis"].set_i(src_op.axis); } +void ConvertZerosLikeOperator(const Model& model, + const TensorFlowZerosLikeOperator& src_op, + const char* op_name, GraphDef* tensorflow_graph) { + tensorflow::NodeDef* zeros_like_op = tensorflow_graph->add_node(); + zeros_like_op->set_op(op_name); + zeros_like_op->set_name(src_op.outputs[0]); + DCHECK_EQ(src_op.inputs.size(), 1); + *zeros_like_op->add_input() = src_op.inputs[0]; + const tensorflow::DataType data_type = + GetTensorFlowDataType(model, src_op.inputs[0]); + (*zeros_like_op->mutable_attr())["T"].set_type(data_type); +} + void ConvertOperator(const Model& model, const Operator& src_op, GraphDef* tensorflow_graph) { if (src_op.fused_activation_function != FusedActivationFunctionType::kNone) { @@ -2233,6 +2246,10 @@ void ConvertOperator(const Model& model, const Operator& src_op, } else if (src_op.type == OperatorType::kUnpack) { ConvertUnpackOperator(model, static_cast(src_op), "Unpack", tensorflow_graph); + } else if (src_op.type == OperatorType::kZerosLike) { + ConvertZerosLikeOperator( + model, static_cast(src_op), + "ZerosLike", tensorflow_graph); } else { LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type); } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index f103bb94ae..6c72e20121 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1655,6 +1655,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kLogicalAnd: case OperatorType::kLogicalNot: case OperatorType::kLogicalOr: + case OperatorType::kZerosLike: ProcessSimpleOperator(model, op, 0); break; case OperatorType::kGather: diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 2ccfd36b7c..4c678e7e73 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -2065,6 +2065,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() { {"TopKV2", ConvertTopKV2Operator}, {"Transpose", ConvertSimpleOperator}, {"Unpack", ConvertUnpackOperator}, + {"ZerosLike", ConvertSimpleOperator}, }); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 164b70f2df..0fd2732973 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -150,6 +150,7 @@ enum class OperatorType : uint8 { kLogicalOr, kCTCBeamSearchDecoder, kUnpack, + kZerosLike, }; // Helper to deal with TensorFlow arrays using a different ordering of @@ -1849,6 +1850,16 @@ struct UnpackOperator : Operator { ArrayDataType dtype = ArrayDataType::kNone; }; +// ZerosLike operator: +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: tf.zeros_like +struct TensorFlowZerosLikeOperator : Operator { + TensorFlowZerosLikeOperator() : Operator(OperatorType::kZerosLike) {} +}; + // Alloc's are used for transient arrays only. An Alloc specifies which interval // of the "transient_data" workspace buffer passed to inference functions, is to // be used for the transient array at hand. The 'start' and 'end' values are diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 1061e7c7c4..c59a28b864 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -1500,6 +1500,8 @@ std::vector> BuildOperatorList( "RSQRT", OperatorType::kRsqrt)); ops.push_back(MakeUnique>( "SQUARE", OperatorType::kSquare)); + ops.push_back(MakeUnique>( + "ZEROS_LIKE", OperatorType::kZerosLike)); return ops; } diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 72e50a9aed..0bc591e647 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -146,6 +146,8 @@ TEST_F(OperatorTest, SimpleOperators) { CheckSimpleOperator("FLOOR_DIV", OperatorType::kFloorDiv); CheckSimpleOperator("SQUARE", OperatorType::kSquare); + CheckSimpleOperator("ZEROS_LIKE", + OperatorType::kZerosLike); } TEST_F(OperatorTest, BuiltinAdd) { diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 6ab93d9316..4a1ae35cb5 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -406,6 +406,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(LogicalOr) HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder) HANDLE_OPERATORTYPENAME_CASE(Unpack) + HANDLE_OPERATORTYPENAME_CASE(ZerosLike) default: LOG(FATAL) << "Unhandled op type"; #undef HANDLE_OPERATORTYPENAME_CASE -- GitLab From 0827dcb82b0bf3d8d543cef1d3a17d330f183848 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 01:59:43 -0700 Subject: [PATCH 0250/1357] Add fill to schema. PiperOrigin-RevId: 213234759 --- tensorflow/contrib/lite/builtin_ops.h | 1 + .../lite/core/api/flatbuffer_conversions.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 124 +++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7f33942c90..7809d114e2 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -119,6 +119,7 @@ typedef enum { kTfLiteBuiltinReduceAny = 91, kTfLiteBuiltinSquare = 92, kTfLiteBuiltinZerosLike = 93, + kTfLiteBuiltinFill = 94, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index ceb2bbd612..03af538073 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -619,6 +619,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_FLOOR_DIV: case BuiltinOperator_SQUARE: case BuiltinOperator_ZEROS_LIKE: + case BuiltinOperator_FILL: break; } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 3b6a81ffde..a1c7434599 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -674,6 +674,7 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_REDUCE_ANY: case tflite::BuiltinOperator_SQUARE: case tflite::BuiltinOperator_ZEROS_LIKE: + case tflite::BuiltinOperator_FILL: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 4c339317cb..3da3188c3a 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -175,6 +175,7 @@ enum BuiltinOperator : byte { REDUCE_ANY = 91, SQUARE = 92, ZEROS_LIKE = 93, + FILL = 94, } // Options for the builtin operators. @@ -246,6 +247,7 @@ union BuiltinOptions { FloorDivOptions, SquareOptions, ZerosLikeOptions, + FillOptions, } enum Padding : byte { SAME, VALID } @@ -593,6 +595,9 @@ table SquareOptions { table ZerosLikeOptions { } +table FillOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 03c227f987..c7a59cabc5 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -232,6 +232,9 @@ struct SquareOptionsT; struct ZerosLikeOptions; struct ZerosLikeOptionsT; +struct FillOptions; +struct FillOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -391,11 +394,12 @@ enum BuiltinOperator { BuiltinOperator_REDUCE_ANY = 91, BuiltinOperator_SQUARE = 92, BuiltinOperator_ZEROS_LIKE = 93, + BuiltinOperator_FILL = 94, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_ZEROS_LIKE + BuiltinOperator_MAX = BuiltinOperator_FILL }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[93] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[94] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -489,7 +493,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[93] { BuiltinOperator_FLOOR_DIV, BuiltinOperator_REDUCE_ANY, BuiltinOperator_SQUARE, - BuiltinOperator_ZEROS_LIKE + BuiltinOperator_ZEROS_LIKE, + BuiltinOperator_FILL }; return values; } @@ -590,6 +595,7 @@ inline const char **EnumNamesBuiltinOperator() { "REDUCE_ANY", "SQUARE", "ZEROS_LIKE", + "FILL", nullptr }; return names; @@ -669,11 +675,12 @@ enum BuiltinOptions { BuiltinOptions_FloorDivOptions = 65, BuiltinOptions_SquareOptions = 66, BuiltinOptions_ZerosLikeOptions = 67, + BuiltinOptions_FillOptions = 68, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_ZerosLikeOptions + BuiltinOptions_MAX = BuiltinOptions_FillOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[68] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[69] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -742,7 +749,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[68] { BuiltinOptions_UnpackOptions, BuiltinOptions_FloorDivOptions, BuiltinOptions_SquareOptions, - BuiltinOptions_ZerosLikeOptions + BuiltinOptions_ZerosLikeOptions, + BuiltinOptions_FillOptions }; return values; } @@ -817,6 +825,7 @@ inline const char **EnumNamesBuiltinOptions() { "FloorDivOptions", "SquareOptions", "ZerosLikeOptions", + "FillOptions", nullptr }; return names; @@ -1099,6 +1108,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1666,6 +1679,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_ZerosLikeOptions ? reinterpret_cast(value) : nullptr; } + FillOptionsT *AsFillOptions() { + return type == BuiltinOptions_FillOptions ? + reinterpret_cast(value) : nullptr; + } + const FillOptionsT *AsFillOptions() const { + return type == BuiltinOptions_FillOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -5949,6 +5970,46 @@ inline flatbuffers::Offset CreateZerosLikeOptions( flatbuffers::Offset CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct FillOptionsT : public flatbuffers::NativeTable { + typedef FillOptions TableType; + FillOptionsT() { + } +}; + +struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FillOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + FillOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FillOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + FillOptionsBuilder &operator=(const FillOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFillOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + FillOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -6283,6 +6344,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { return builtin_options_type() == BuiltinOptions_ZerosLikeOptions ? static_cast(builtin_options()) : nullptr; } + const FillOptions *builtin_options_as_FillOptions() const { + return builtin_options_type() == BuiltinOptions_FillOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -6582,6 +6646,10 @@ template<> inline const ZerosLikeOptions *Operator::builtin_options_as inline const FillOptions *Operator::builtin_options_as() const { + return builtin_options_as_FillOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -8873,6 +8941,29 @@ inline flatbuffers::Offset CreateZerosLikeOptions(flatbuffers: _fbb); } +inline FillOptionsT *FillOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new FillOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void FillOptions::UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset FillOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFillOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FillOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateFillOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -9330,6 +9421,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -9616,6 +9711,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -9890,6 +9989,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(value); + return CreateFillOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -10164,6 +10267,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new ZerosLikeOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_FillOptions: { + value = new FillOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -10506,6 +10613,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From 5ff7f982846bd3f8056c8252a0afeb07e5b3e982 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 02:01:53 -0700 Subject: [PATCH 0251/1357] compat: Update forward compatibility horizon to 2018-09-17 PiperOrigin-RevId: 213234942 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index c246a98237..0d2f2c9b9e 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 16) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 17) @tf_export("compat.forward_compatible") -- GitLab From 2b39e9861acaf06923e4e0802581dd7581609a01 Mon Sep 17 00:00:00 2001 From: tomguluson92 <314913739@qq.com> Date: Mon, 17 Sep 2018 17:12:10 +0800 Subject: [PATCH 0252/1357] revised a parameter error Hi, i found that when firstly use `interpreter `as a parameter pass into `eval_model` function, wrong spell mistake of `interpreter_quant`. --- tensorflow/contrib/lite/tutorials/post_training_quant.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb index 4929133bda..82abbc1532 100644 --- a/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb +++ b/tensorflow/contrib/lite/tutorials/post_training_quant.ipynb @@ -542,7 +542,7 @@ }, "outputs": [], "source": [ - "print(eval_model(interpreter_quant, mnist_ds))" + "print(eval_model(interpreter, mnist_ds))" ] }, { -- GitLab From fa80a920f2a3bc00522fe95fc9a07a28d67fc055 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Mon, 17 Sep 2018 12:50:18 +0300 Subject: [PATCH 0253/1357] Add 'override' specifier to ReadData, WriteData. --- tensorflow/contrib/ignite/kernels/ignite_plain_client.h | 4 ++-- tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h index 750ebe605a..d12d56fdc1 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -29,8 +29,8 @@ class PlainClient : public Client { virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, const int32_t length); - virtual Status WriteData(const uint8_t* buf, const int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length) override; + virtual Status WriteData(const uint8_t* buf, const int32_t length) override; private: const string host_; diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h index d59ce91aba..372156a757 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -32,8 +32,8 @@ class SslWrapper : public Client { virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, const int32_t length); - virtual Status WriteData(const uint8_t* buf, const int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length) override; + virtual Status WriteData(const uint8_t* buf, const int32_t length) override; private: Status InitSslContext(); -- GitLab From cac963862be3faa421c559f39033c9bfb3b27a51 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 17 Sep 2018 03:12:38 -0700 Subject: [PATCH 0254/1357] [XLA:TF] Enable int8 and uint8 support in the bridge for CPU/GPU The test changes are awkward. None of these are XLA bugs, it's just that the op definitions in tensorflow are really inconsistent. I tried to infer whether the limitation is on signed types, index types or just arbitrary. In the latter case just int8/uint8 is blacklisted, we should probably lift that requirement at some point. PiperOrigin-RevId: 213243906 --- tensorflow/compiler/jit/xla_cpu_device.cc | 5 +++-- tensorflow/compiler/jit/xla_gpu_device.cc | 6 +++--- tensorflow/compiler/tests/argminmax_test.py | 4 ++-- tensorflow/compiler/tests/binary_ops_test.py | 11 ++++++----- tensorflow/compiler/tests/build_defs.bzl | 4 ++-- tensorflow/compiler/tests/random_ops_test.py | 3 ++- .../compiler/tests/reverse_sequence_op_test.py | 2 +- tensorflow/compiler/tests/unary_ops_test.py | 4 ++-- tensorflow/compiler/tests/xla_ops_test.py | 2 +- tensorflow/compiler/tests/xla_test.py | 6 ++++++ tensorflow/compiler/tf2xla/xla_op_registry.h | 18 +++++++++--------- 11 files changed, 37 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc index 7e159e3171..1afc305abe 100644 --- a/tensorflow/compiler/jit/xla_cpu_device.cc +++ b/tensorflow/compiler/jit/xla_cpu_device.cc @@ -65,8 +65,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_CPU, XlaCpuDeviceFactory); // Kernel registrations -constexpr std::array kAllXlaCpuTypes = { - {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; +constexpr std::array kAllXlaCpuTypes = { + {DT_UINT8, DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, + DT_COMPLEX64, DT_BOOL}}; REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_CPU, XlaLocalLaunchOp, kAllXlaCpuTypes); REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_CPU, kAllXlaCpuTypes); diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index ef4466f005..4cf556524d 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -74,9 +74,9 @@ REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_GPU, XlaGpuDeviceFactory); // Kernel registrations -constexpr std::array kAllXlaGpuTypes = { - {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, - DT_BFLOAT16}}; +constexpr std::array kAllXlaGpuTypes = { + {DT_UINT8, DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, + DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}}; REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_GPU, XlaLocalLaunchOp, kAllXlaGpuTypes); REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_GPU, kAllXlaGpuTypes); diff --git a/tensorflow/compiler/tests/argminmax_test.py b/tensorflow/compiler/tests/argminmax_test.py index 4155342787..68f52e796c 100644 --- a/tensorflow/compiler/tests/argminmax_test.py +++ b/tensorflow/compiler/tests/argminmax_test.py @@ -50,12 +50,12 @@ class ArgMinMaxTest(xla_test.XLATestCase): def testArgMinMax(self): # Complex numbers do not support argmin/argmax. - minmax_types = set(self.numeric_types) - set(self.complex_types) + minmax_types = self.all_types & {np.int32, np.int64} for dtype in minmax_types: # output_type is a numpy data type that is used to specify the desired # output type of the op as well as to convert the Python number to the # array scalar of the type. - for output_type in self.int_types: + for output_type in minmax_types: self._assertOpOutputMatchesExpected( math_ops.argmax, axis=0, diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 17280e445b..900e84ab58 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -210,7 +210,7 @@ class BinaryOpsTest(xla_test.XLATestCase): equality_test=self.ListsAreClose) def testIntOps(self): - for dtype in self.int_types: + for dtype in self.signed_int_types: self._testBinary( gen_math_ops.truncate_div, np.array([3, 3, -1, -9, -8], dtype=dtype), @@ -287,7 +287,8 @@ class BinaryOpsTest(xla_test.XLATestCase): dtype(7), expected=np.array([[-6], [-5]], dtype=dtype)) - if dtype not in self.complex_types: # min/max not supported for complex + # min/max not supported for complex + if dtype not in self.complex_types | {np.uint8, np.int8}: self._testBinary( math_ops.maximum, np.array([1, 2], dtype=dtype), @@ -337,7 +338,7 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([[70], [14]], dtype=dtype)) # Complex support for squared_difference is incidental, see b/68205550 - if dtype not in self.complex_types: + if dtype not in self.complex_types | {np.uint8, np.int8}: self._testBinary( math_ops.squared_difference, np.array([1, 2], dtype=dtype), @@ -567,7 +568,7 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([1, -2, -1, -5, 2], dtype=dtype)) def testIntDivision(self): - for dtype in self.int_types: + for dtype in self.signed_int_types: self._testDivision(dtype) def testFloatDivision(self): @@ -588,7 +589,7 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([1, 1, -1, 0], dtype=dtype)) def testIntRemainder(self): - for dtype in self.int_types: + for dtype in self.signed_int_types - {np.int8}: self._testRemainder(dtype) def testFloatRemainder(self): diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl index a76f136736..114793352e 100644 --- a/tensorflow/compiler/tests/build_defs.bzl +++ b/tensorflow/compiler/tests/build_defs.bzl @@ -58,12 +58,12 @@ def tf_xla_py_test( if backend == "cpu": backend_args += [ "--test_device=XLA_CPU", - "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64", + "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64", ] elif backend == "gpu": backend_args += [ "--test_device=XLA_GPU", - "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16", + "--types=DT_HALF,DT_FLOAT,DT_DOUBLE,DT_UINT8,DT_INT8,DT_INT32,DT_INT64,DT_BOOL,DT_COMPLEX64,DT_BFLOAT16", ] backend_tags += ["requires-gpu-sm35"] elif backend in plugins: diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py index 6e18344117..41fe42a26b 100644 --- a/tensorflow/compiler/tests/random_ops_test.py +++ b/tensorflow/compiler/tests/random_ops_test.py @@ -35,7 +35,8 @@ class RandomOpsTest(xla_test.XLATestCase): """Test cases for random-number generating operators.""" def _random_types(self): - return set(self.numeric_types) - set(self.complex_types) + return set(self.numeric_types) - set( + self.complex_types) - {np.uint8, np.int8} def _testRngIsNotConstant(self, rng, dtype): # Tests that 'rng' does not always return the same value. diff --git a/tensorflow/compiler/tests/reverse_sequence_op_test.py b/tensorflow/compiler/tests/reverse_sequence_op_test.py index 60c2337743..abc822ef36 100644 --- a/tensorflow/compiler/tests/reverse_sequence_op_test.py +++ b/tensorflow/compiler/tests/reverse_sequence_op_test.py @@ -85,7 +85,7 @@ class ReverseSequenceTest(xla_test.XLATestCase): def testSeqLength(self): for dtype in self.all_types: - for seq_dtype in self.int_types: + for seq_dtype in self.all_types & {np.int32, np.int64}: self._testBasic(dtype, seq_dtype) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 5b0e57f83f..04ea004fe7 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -84,7 +84,7 @@ class UnaryOpsTest(xla_test.XLATestCase): self.assertAllClose(result[i], expected[i], rtol, atol) def testAllTypeOps(self): - for dtype in self.numeric_types: + for dtype in self.numeric_types - {np.int8, np.uint8}: self._assertOpOutputMatchesExpected( array_ops.diag, np.array([1, 2, 3, 4], dtype=dtype), np.array( @@ -633,7 +633,7 @@ class UnaryOpsTest(xla_test.XLATestCase): expected=np.array([-1, 0, -2, -17, -43], dtype=dtype)) def testNumericOps(self): - for dtype in self.numeric_types: + for dtype in self.numeric_types - {np.int8, np.uint8}: self._assertOpOutputMatchesExpected( math_ops.abs, np.array([[2, -1]], dtype=dtype), diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py index 1e600c44e9..4cf88fc523 100644 --- a/tensorflow/compiler/tests/xla_ops_test.py +++ b/tensorflow/compiler/tests/xla_ops_test.py @@ -181,7 +181,7 @@ class XlaOpsTest(xla_test.XLATestCase, parameterized.TestCase): dtype=dtype)) def testNeg(self): - for dtype in self.numeric_types: + for dtype in self.numeric_types - {np.uint8, np.int8}: self._assertOpOutputMatchesExpected( xla.neg, args=(np.array([1, 2, 3], dtype=dtype),), diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py index 88827cb53b..df5c81243a 100644 --- a/tensorflow/compiler/tests/xla_test.py +++ b/tensorflow/compiler/tests/xla_test.py @@ -101,6 +101,12 @@ class XLATestCase(test.TestCase): self._all_types = set( [dtype.as_numpy_dtype for dtype in self._all_tf_types]) self._int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types]) + self.signed_int_types = set(dtype.as_numpy_dtype + for dtype in self.int_tf_types + if not dtype.is_unsigned) + self.unsigned_int_types = set(dtype.as_numpy_dtype + for dtype in self.int_tf_types + if dtype.is_unsigned) self._float_types = set( [dtype.as_numpy_dtype for dtype in self._float_tf_types]) self.complex_types = set([ diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h index 74a4885f1f..34e22a4510 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.h +++ b/tensorflow/compiler/tf2xla/xla_op_registry.h @@ -47,17 +47,17 @@ extern const char* const DEVICE_XLA_GPU; constexpr std::array kFloatTypes = { {DT_HALF, DT_FLOAT, DT_DOUBLE, DT_BFLOAT16}}; -constexpr std::array kNumericTypes = { - {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, - DT_COMPLEX64, DT_BFLOAT16}}; +constexpr std::array kNumericTypes = { + {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF, + DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BFLOAT16}}; -constexpr std::array kCpuAllTypes = { - {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, - DT_COMPLEX64, DT_BOOL}}; +constexpr std::array kCpuAllTypes = { + {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF, + DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; -constexpr std::array kGpuAllTypes = { - {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, - DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}}; +constexpr std::array kGpuAllTypes = { + {DT_UINT8, DT_UINT32, DT_UINT64, DT_INT8, DT_INT32, DT_INT64, DT_HALF, + DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL, DT_BFLOAT16}}; // Class that manages registrations of operators and devices for the XLA JIT. // Not thread-safe. -- GitLab From 055e5a0f71c83bab3f645d1c2e2cadeff5ff654f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 06:29:16 -0700 Subject: [PATCH 0255/1357] README s/tensorflow.contrib/tensorflow.python/. PiperOrigin-RevId: 213262445 --- tensorflow/python/autograph/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/README.md b/tensorflow/python/autograph/README.md index cc54da4daa..bfe21b4765 100644 --- a/tensorflow/python/autograph/README.md +++ b/tensorflow/python/autograph/README.md @@ -65,7 +65,7 @@ pip install -U tf-nightly Then import the `autograph` module from `tf.contrib`: ``` -from tensorflow.contrib import autograph as ag +from tensorflow.python import autograph as ag ``` ### Related links -- GitLab From e0d6830999a6e7c92f047e6e89c3aba20911cc8c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 08:21:43 -0700 Subject: [PATCH 0256/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213275003 --- .../internal/optimized/depthwiseconv_float.h | 109 +++++++++++----- .../internal/optimized/depthwiseconv_uint8.h | 123 ++++++++++++------ .../depthwiseconv_uint8_3x3_filter.h | 66 ++++++---- .../contrib/lite/kernels/internal/types.h | 2 + 4 files changed, 206 insertions(+), 94 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index 70810ca784..f2d1319801 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -907,25 +907,40 @@ inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth, } } -inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { +inline void DepthwiseConv( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data) { gemmlowp::ScopedProfilingLabel label("DepthwiseConv"); - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int input_depth = ArraySize(input_dims, 0); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); - TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + // TODO(suharshs): Optimized implementation of dilation depthwise conv need to + // be implemented. + TFLITE_DCHECK_EQ(params.dilation_width_factor, 1); + TFLITE_DCHECK_EQ(params.dilation_height_factor, 1); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); static const int kAccBufferMaxSize = 2048; float acc_buffer[kAccBufferMaxSize]; @@ -990,6 +1005,10 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, row_accum_func = FloatDepthwiseConvAccumRowGeneric; } + const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2); + const int input_batch_stride = input_height_stride * input_shape.Dims(1); + const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2); + // Now that we have determined row_accum_func, we can start work. float* output_ptr = output_data; for (int b = 0; b < batches; ++b) { @@ -1014,13 +1033,12 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { const int in_y = in_y_origin + filter_y; - row_accum_func(stride_width, input_depth, input_width, - input_data + in_y * input_dims.strides[2] + - b * input_dims.strides[3], - pad_width, depth_multiplier, filter_width, - filter_data + filter_y * filter_dims.strides[2], - out_x_buffer_start, out_x_buffer_end, output_depth, - acc_buffer); + row_accum_func( + stride_width, input_depth, input_width, + input_data + in_y * input_height_stride + b * input_batch_stride, + pad_width, depth_multiplier, filter_width, + filter_data + filter_y * filter_height_stride, out_x_buffer_start, + out_x_buffer_end, output_depth, acc_buffer); } // Finished accumulating. Now store to destination. const int num_output_values = output_depth * num_output_pixels; @@ -1067,6 +1085,8 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, const float* bias_data, const Dims<4>& bias_dims, @@ -1078,15 +1098,43 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, const Dims<4>& output_dims) { // TODO(suharshs): Optimized implementation of dilation depthwise conv need to // be implemented. - TFLITE_DCHECK(dilation_width_factor == 1); - TFLITE_DCHECK(dilation_height_factor == 1); + TFLITE_DCHECK_EQ(dilation_width_factor, 1); + TFLITE_DCHECK_EQ(dilation_height_factor, 1); + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride_width, stride_height, pad_width, pad_height, - depth_multiplier, output_activation_min, output_activation_max, - output_data, output_dims); + bias_dims, stride_width, stride_height, 1, 1, pad_width, + pad_height, depth_multiplier, output_activation_min, + output_activation_max, output_data, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, @@ -1103,6 +1151,7 @@ void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, output_data, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index f707279600..ccb9d1654f 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1669,33 +1669,50 @@ inline void DepthwiseConvInitAccBuffer(int num_output_pixels, int output_depth, } } -inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { +inline void DepthwiseConv( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + uint8* output_data) { gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit"); - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; + const int32 input_offset = params.input_offset; + const int32 filter_offset = params.weights_offset; + const int32 output_offset = params.output_offset; + const int32 output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + // TODO(suharshs): Optimized implementation of dilation depthwise conv need to + // be implemented. + TFLITE_DCHECK_EQ(params.dilation_width_factor, 1); + TFLITE_DCHECK_EQ(params.dilation_height_factor, 1); - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int input_depth = ArraySize(input_dims, 0); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); #ifdef USE_NEON const bool shift_left = (output_shift <= 0); const int32 multiplier_power_of_two = shift_left ? (1 << -output_shift) : 1; #endif - TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. @@ -1703,14 +1720,11 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. if (Fast3x3FilterKernelSupported( - input_dims, filter_dims, stride_width, stride_height, pad_width, - pad_height, depth_multiplier, output_dims, output_shift)) { - DepthwiseConv3x3Filter(input_data, input_dims, input_offset, filter_data, - filter_dims, filter_offset, bias_data, bias_dims, - stride_width, stride_height, pad_width, pad_height, - depth_multiplier, output_offset, output_multiplier, - output_shift, output_activation_min, - output_activation_max, output_data, output_dims); + input_shape, filter_shape, stride_width, stride_height, pad_width, + pad_height, depth_multiplier, output_shape, output_shift)) { + DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape, + filter_data, bias_shape, bias_data, output_shape, + output_data); return; } #endif @@ -1785,6 +1799,10 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, #undef TFMINI_USE_DEPTHWISECONV_KERNEL + const int input_height_stride = input_shape.Dims(3) * input_shape.Dims(2); + const int input_batch_stride = input_height_stride * input_shape.Dims(1); + const int filter_height_stride = filter_shape.Dims(3) * filter_shape.Dims(2); + // Now that we have determined row_accum_func, we can start work. uint8* output_ptr = output_data; for (int b = 0; b < batches; ++b) { @@ -1811,10 +1829,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, const int in_y = in_y_origin + filter_y; row_accum_func( stride_width, input_depth, input_width, - input_data + in_y * input_dims.strides[2] + - b * input_dims.strides[3], + input_data + in_y * input_height_stride + b * input_batch_stride, input_offset, pad_width, depth_multiplier, filter_width, - filter_data + filter_y * filter_dims.strides[2], filter_offset, + filter_data + filter_y * filter_height_stride, filter_offset, out_x_buffer_start, out_x_buffer_end, output_depth, acc_buffer); } // Finished accumulating int32 values. Now need to convert them to @@ -1964,6 +1981,8 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, @@ -1975,19 +1994,48 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, int output_shift, int32 output_activation_min, int32 output_activation_max, uint8* output_data, const Dims<4>& output_dims) { - // TODO(suharshs): Optimized implementation of dilation depthwise is not - // supported yet. - TFLITE_DCHECK(dilation_width_factor == 1); - TFLITE_DCHECK(dilation_height_factor == 1); + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, filter_offset, bias_data, bias_dims, stride_width, - stride_height, pad_width, pad_height, depth_multiplier, + stride_height, 1, 1, pad_width, pad_height, depth_multiplier, output_offset, output_multiplier, output_shift, output_activation_min, output_activation_max, output_data, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy, for compatibility with old checked-in code. template void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, @@ -2011,6 +2059,7 @@ void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy, for compatibility with old checked-in code. template void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 0ce64f8c70..9fed53cafb 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -3175,16 +3175,17 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, } inline bool Fast3x3FilterKernelSupported( - const Dims<4>& input_dims, const Dims<4>& filter_dims, int32 stride_width, - int32 stride_height, int32 pad_width, int32 pad_height, - int32 depth_multiplier, const Dims<4>& output_dims, int32 output_shift) { - const int32 input_height = ArraySize(input_dims, 2); - const int32 input_width = ArraySize(input_dims, 1); - const int32 input_depth = ArraySize(input_dims, 0); - const int32 filter_height = ArraySize(filter_dims, 2); - const int32 filter_width = ArraySize(filter_dims, 1); - const int32 output_height = ArraySize(output_dims, 2); - const int32 output_width = ArraySize(output_dims, 1); + const RuntimeShape& input_shape, const RuntimeShape& filter_shape, + int32 stride_width, int32 stride_height, int32 pad_width, int32 pad_height, + int32 depth_multiplier, const RuntimeShape& output_shape, + int32 output_shift) { + const int32 input_height = input_shape.Dims(1); + const int32 input_width = input_shape.Dims(2); + const int32 input_depth = input_shape.Dims(3); + const int32 filter_height = filter_shape.Dims(1); + const int32 filter_width = filter_shape.Dims(2); + const int32 output_height = output_shape.Dims(1); + const int32 output_width = output_shape.Dims(2); bool supported = filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && @@ -3234,26 +3235,37 @@ inline bool Fast3x3FilterKernelSupported( } inline void DepthwiseConv3x3Filter( - const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, - const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, int32 stride_width, - int32 stride_height, int32 pad_width, int32 pad_height, - int32 depth_multiplier, int32 output_offset, int32 output_multiplier, - int32 output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { + const DepthwiseParams& rt_params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + uint8* output_data) { gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__); DepthwiseConvParams params; - params.input_depth = ArraySize(input_dims, 0); - params.input_width = ArraySize(input_dims, 1); - params.input_height = ArraySize(input_dims, 2); + + const int32 stride_width = rt_params.stride_width; + const int32 stride_height = rt_params.stride_height; + const int32 pad_width = rt_params.padding_values.width; + const int32 pad_height = rt_params.padding_values.height; + const int32 depth_multiplier = rt_params.depth_multiplier; + const int32 output_activation_min = rt_params.quantized_activation_min; + const int32 output_activation_max = rt_params.quantized_activation_max; + const int32 input_offset = rt_params.input_offset; + const int32 filter_offset = rt_params.weights_offset; + const int32 output_offset = rt_params.output_offset; + const int32 output_multiplier = rt_params.output_multiplier; + const int32 output_shift = rt_params.output_shift; + + params.input_depth = input_shape.Dims(3); + params.input_width = input_shape.Dims(2); + params.input_height = input_shape.Dims(1); params.input_row_size = params.input_depth * params.input_width; params.input_offset = input_offset; params.stride_width = stride_width; params.stride_height = stride_height; - params.output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); - params.output_width = ArraySize(output_dims, 1); - params.output_height = ArraySize(output_dims, 2); + params.output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + params.output_width = output_shape.Dims(2); + params.output_height = output_shape.Dims(1); params.output_row_size = params.output_depth * params.output_width; params.output_offset = output_offset; params.filter_offset = filter_offset; @@ -3262,8 +3274,8 @@ inline void DepthwiseConv3x3Filter( params.output_activation_min = output_activation_min; params.output_activation_max = output_activation_max; - const int32 filter_height = ArraySize(filter_dims, 2); - const int32 filter_width = ArraySize(filter_dims, 1); + const int32 filter_height = filter_shape.Dims(1); + const int32 filter_width = filter_shape.Dims(2); params.filter_row_size = params.output_depth * filter_width; // Algorithm assumes below constraints. It is optimized for depth @@ -3279,7 +3291,7 @@ inline void DepthwiseConv3x3Filter( TFLITE_DCHECK(pad_width == 0 || pad_width == 1); TFLITE_DCHECK(pad_width == pad_height); - const int32 batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int32 batches = MatchingDim(input_shape, 0, output_shape, 0); const int64_t input_batch_size = params.input_row_size * params.input_height; const int64_t output_batch_size = params.output_row_size * params.output_height; diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index f6636acc58..ac4626bc30 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -772,6 +772,8 @@ struct DepthwiseParams { PaddingValues padding_values; int16 stride_width; int16 stride_height; + int16 dilation_width_factor; + int16 dilation_height_factor; int16 depth_multiplier; // uint8 inference params. // TODO(b/65838351): Use smaller types if appropriate. -- GitLab From 0f4861d3a75744353cc6885987c0ec919102b2cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 09:08:49 -0700 Subject: [PATCH 0257/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213281730 --- .../internal/reference/depthwiseconv_float.h | 90 +++++++++++---- .../internal/reference/depthwiseconv_uint8.h | 107 +++++++++++++----- 2 files changed, 148 insertions(+), 49 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h index bb5d590775..a8428528c9 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h @@ -22,25 +22,36 @@ limitations under the License. namespace tflite { namespace reference_ops { -inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, int depth_multiplier, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int input_depth = ArraySize(input_dims, 0); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); - TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); +inline void DepthwiseConv( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); for (int b = 0; b < batches; ++b) { for (int out_y = 0; out_y < output_height; ++out_y) { @@ -61,18 +72,18 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { float input_value = - input_data[Offset(input_dims, ic, in_x, in_y, b)]; + input_data[Offset(input_shape, b, in_y, in_x, ic)]; float filter_value = filter_data[Offset( - filter_dims, oc, filter_x, filter_y, 0)]; + filter_shape, 0, filter_y, filter_x, oc)]; total += (input_value * filter_value); } } } float bias_value = 0.0f; if (bias_data) { - bias_value = bias_data[Offset(bias_dims, oc, 0, 0, 0)]; + bias_value = bias_data[oc]; } - output_data[Offset(output_dims, oc, out_x, out_y, b)] = + output_data[Offset(output_shape, b, out_y, out_x, oc)] = ActivationFunctionWithMinMax(total + bias_value, output_activation_min, output_activation_max); @@ -83,6 +94,37 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, + int dilation_width_factor, int dilation_height_factor, + int pad_width, int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, const float* bias_data, const Dims<4>& bias_dims, @@ -97,6 +139,7 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, output_activation_max, output_data, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy, for compatibility with old checked-in code. template void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, @@ -113,6 +156,7 @@ void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, output_data, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy, for compatibility with old checked-in code. template void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h index 5e3e8997fc..38aea14c21 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -26,27 +26,43 @@ limitations under the License. namespace tflite { namespace reference_ops { -inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int input_depth = ArraySize(input_dims, 0); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); - TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); +inline void DepthwiseConv( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + uint8* output_data) { + gemmlowp::ScopedProfilingLabel label("DepthwiseConv/8bit"); + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int depth_multiplier = params.depth_multiplier; + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; + const int32 input_offset = params.input_offset; + const int32 filter_offset = params.weights_offset; + const int32 output_offset = params.output_offset; + const int32 output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); for (int b = 0; b < batches; ++b) { for (int out_y = 0; out_y < output_height; ++out_y) { @@ -67,23 +83,23 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { int32 input_val = - input_data[Offset(input_dims, ic, in_x, in_y, b)]; - int32 filter_val = filter_data[Offset(filter_dims, oc, - filter_x, filter_y, 0)]; + input_data[Offset(input_shape, b, in_y, in_x, ic)]; + int32 filter_val = filter_data[Offset( + filter_shape, 0, filter_y, filter_x, oc)]; acc += (filter_val + filter_offset) * (input_val + input_offset); } } } if (bias_data) { - acc += bias_data[Offset(bias_dims, oc, 0, 0, 0)]; + acc += bias_data[oc]; } acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, -output_shift); acc += output_offset; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); - output_data[Offset(output_dims, oc, out_x, out_y, b)] = + output_data[Offset(output_shape, b, out_y, out_x, oc)] = static_cast(acc); } } @@ -92,6 +108,43 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, + int dilation_width_factor, int dilation_height_factor, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, @@ -110,6 +163,7 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy, for compatibility with old checked-in code. template void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, @@ -133,6 +187,7 @@ void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, output_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // Legacy, for compatibility with old checked-in code. template void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, -- GitLab From 07bc3696135483612c727ca7687342922ff0d5de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 09:13:45 -0700 Subject: [PATCH 0258/1357] Removing unused code comment in AutoGraph error rewriting. PiperOrigin-RevId: 213282302 --- tensorflow/python/autograph/core/errors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/autograph/core/errors.py b/tensorflow/python/autograph/core/errors.py index 0750353423..23f8c5b52b 100644 --- a/tensorflow/python/autograph/core/errors.py +++ b/tensorflow/python/autograph/core/errors.py @@ -208,7 +208,6 @@ def rewrite_tf_runtime_error(error, source_map): """ try: cleaned_traceback = _cut_traceback_loops(source_map, error.op.traceback) - # cleaned_traceback = error.op.traceback cleaned_traceback = _rewrite_tb(source_map, cleaned_traceback) op_name = error.op.name -- GitLab From c8a0dfc741736a59f8fd1776b71f38619d66da56 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 17 Sep 2018 09:21:14 -0700 Subject: [PATCH 0259/1357] [tf.data] Adding support for `tf.data.AUTOTUNE` as a special value for the `num_parallel_calls` argument of `tf.data.Dataset.map()`, `tf.data.Dataset.interleave()`, and `tf.contrib.data.map_and_batch()`. When `tf.data.AUTOTUNE` is specified, the level of parallelism is determined at runtime. The underlying mechanism instruments the input pipeline to build a performance model and then uses the model to find the optimal values for the parallelism knobs. PiperOrigin-RevId: 213283297 --- .../optimization/model_dataset_op_test.py | 17 +- .../makefile/proto_text_pb_cc_files.txt | 1 - .../makefile/proto_text_pb_h_files.txt | 1 - .../contrib/makefile/tf_pb_text_files.txt | 1 - .../contrib/makefile/tf_proto_files.txt | 1 - tensorflow/core/BUILD | 2 - tensorflow/core/framework/dataset.cc | 1 - tensorflow/core/framework/dataset.h | 31 ++- tensorflow/core/framework/model.cc | 251 ++++++++---------- tensorflow/core/framework/model.h | 97 +++---- tensorflow/core/framework/model.proto | 30 --- .../core/kernels/data/batch_dataset_op.cc | 2 +- .../kernels/data/map_and_batch_dataset_op.cc | 42 ++- .../core/kernels/data/model_dataset_op.cc | 25 +- .../kernels/data/padded_batch_dataset_op.cc | 2 +- .../data/parallel_interleave_dataset_op.cc | 31 ++- .../kernels/data/parallel_map_dataset_op.cc | 2 +- .../kernels/data/parallel_map_iterator.cc | 35 ++- 18 files changed, 299 insertions(+), 273 deletions(-) delete mode 100644 tensorflow/core/framework/model.proto diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py index 0a87d3e905..2b3ac85924 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py @@ -58,7 +58,8 @@ class ModelDatasetTest(test.TestCase): dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat() - dataset = dataset.map(math_ops.matmul, num_parallel_calls=56) + dataset = dataset.map( + math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE) iterator = dataset.apply(optimization.model()).make_one_shot_iterator() get_next = iterator.get_next() @@ -84,7 +85,9 @@ class ModelDatasetTest(test.TestCase): 1))).repeat() dataset = dataset.apply( batching.map_and_batch( - math_ops.matmul, num_parallel_calls=28, batch_size=batch_size)) + math_ops.matmul, + num_parallel_calls=optimization.AUTOTUNE, + batch_size=batch_size)) iterator = dataset.apply(optimization.model()).make_one_shot_iterator() get_next = iterator.get_next() @@ -109,7 +112,9 @@ class ModelDatasetTest(test.TestCase): 1))).repeat() dataset = dataset.map(math_ops.matmul) dataset = dataset_ops.Dataset.range(1).repeat().interleave( - lambda _: dataset, cycle_length=56, num_parallel_calls=56) + lambda _: dataset, + cycle_length=10, + num_parallel_calls=optimization.AUTOTUNE) iterator = dataset.apply(optimization.model()).make_one_shot_iterator() get_next = iterator.get_next() @@ -146,15 +151,15 @@ class ModelDatasetTest(test.TestCase): x, y = c return a, b, math_ops.matmul(x, y) - dataset = dataset.map(f1, num_parallel_calls=32) + dataset = dataset.map(f1, num_parallel_calls=optimization.AUTOTUNE) dataset = dataset_ops.Dataset.range(1).repeat().interleave( lambda _: dataset, cycle_length=2) - dataset = dataset.map(f2, num_parallel_calls=16) + dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE) dataset = dataset_ops.Dataset.range(1).repeat().interleave( lambda _: dataset, cycle_length=2) - dataset = dataset.map(f3, num_parallel_calls=10) + dataset = dataset.map(f3, num_parallel_calls=optimization.AUTOTUNE) iterator = dataset.apply(optimization.model()).make_one_shot_iterator() get_next = iterator.get_next() diff --git a/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt b/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt index 1d6d9a60e5..0d8df93d11 100644 --- a/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt +++ b/tensorflow/contrib/makefile/proto_text_pb_cc_files.txt @@ -10,7 +10,6 @@ tensorflow/core/framework/graph.pb.cc tensorflow/core/framework/graph_transfer_info.pb.cc tensorflow/core/framework/kernel_def.pb.cc tensorflow/core/framework/log_memory.pb.cc -tensorflow/core/framework/model.pb.cc tensorflow/core/framework/node_def.pb.cc tensorflow/core/framework/op_def.pb.cc tensorflow/core/framework/remote_fused_graph_execute_info.pb.cc diff --git a/tensorflow/contrib/makefile/proto_text_pb_h_files.txt b/tensorflow/contrib/makefile/proto_text_pb_h_files.txt index 884461ecae..d982df9319 100644 --- a/tensorflow/contrib/makefile/proto_text_pb_h_files.txt +++ b/tensorflow/contrib/makefile/proto_text_pb_h_files.txt @@ -10,7 +10,6 @@ tensorflow/core/framework/graph.pb.h tensorflow/core/framework/graph_transfer_info.pb.h tensorflow/core/framework/kernel_def.pb.h tensorflow/core/framework/log_memory.pb.h -tensorflow/core/framework/model.pb.h tensorflow/core/framework/node_def.pb.h tensorflow/core/framework/op_def.pb.h tensorflow/core/framework/remote_fused_graph_execute_info.pb.h diff --git a/tensorflow/contrib/makefile/tf_pb_text_files.txt b/tensorflow/contrib/makefile/tf_pb_text_files.txt index e23f499214..f94d70db90 100644 --- a/tensorflow/contrib/makefile/tf_pb_text_files.txt +++ b/tensorflow/contrib/makefile/tf_pb_text_files.txt @@ -10,7 +10,6 @@ tensorflow/core/framework/graph.pb_text.cc tensorflow/core/framework/graph_transfer_info.pb_text.cc tensorflow/core/framework/kernel_def.pb_text.cc tensorflow/core/framework/log_memory.pb_text.cc -tensorflow/core/framework/model.pb_text.cc tensorflow/core/framework/node_def.pb_text.cc tensorflow/core/framework/op_def.pb_text.cc tensorflow/core/framework/remote_fused_graph_execute_info.pb_text.cc diff --git a/tensorflow/contrib/makefile/tf_proto_files.txt b/tensorflow/contrib/makefile/tf_proto_files.txt index 5eae845d9b..8bec3e3e01 100644 --- a/tensorflow/contrib/makefile/tf_proto_files.txt +++ b/tensorflow/contrib/makefile/tf_proto_files.txt @@ -14,7 +14,6 @@ tensorflow/core/framework/graph.proto tensorflow/core/framework/graph_transfer_info.proto tensorflow/core/framework/kernel_def.proto tensorflow/core/framework/log_memory.proto -tensorflow/core/framework/model.proto tensorflow/core/framework/node_def.proto tensorflow/core/framework/op_def.proto tensorflow/core/framework/reader_base.proto diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 55715bb3a6..4074232c93 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -178,7 +178,6 @@ COMMON_PROTO_SRCS = [ "framework/iterator.proto", "framework/kernel_def.proto", "framework/log_memory.proto", - "framework/model.proto", "framework/node_def.proto", "framework/op_def.proto", "framework/reader_base.proto", @@ -842,7 +841,6 @@ tf_cuda_library( "framework/log_memory.h", "framework/lookup_interface.h", "framework/memory_types.h", - "framework/model.h", "framework/node_def_builder.h", "framework/node_def_util.h", "framework/numeric_op.h", diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc index 5281c56f04..284dafb886 100644 --- a/tensorflow/core/framework/dataset.cc +++ b/tensorflow/core/framework/dataset.cc @@ -20,7 +20,6 @@ limitations under the License. namespace tensorflow { namespace data { - namespace { // A wrapper class for storing a `DatasetBase` instance in a DT_VARIANT tensor. diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index 4ee6749eea..91b1e61d3c 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -47,6 +47,8 @@ class GraphDefBuilder; class Node; namespace data { +// A constant that can be used to enable auto-tuning. +constexpr int kAutoTune = -1; class DatasetBase; class SerializationContext; @@ -670,13 +672,34 @@ class DatasetBaseIterator : public IteratorBase { return strings::StrCat(params_.prefix, ":", name); } - // When performance modeling is enabled, this method sets metadata entry for - // the model node corresponding to this iterator. - void SetMetadata(IteratorContext* ctx, const string& key, int64 value) { + // When performance modeling is enabled, this method adds a constant parameter + // to the model node corresponding to this iterator. + void AddConstantParameter(IteratorContext* ctx, const string& name, + int64 value) { if (ctx->model()) { std::shared_ptr node = ctx->model()->LookupNode(prefix()); if (node) { - node->set_metadata(key, value); + node->add_constant_param(name, value); + } + } + } + + // When performance modeling is enabled, this method adds a tunable parameter + // to the model node corresponding to this iterator. + // + // The `set_fn` function should set the tunable parameter to the value of + // its input argument. The function should be thread-safe; in particular, the + // state it updates should be protected by a lock as the function can be + // invoked asynchronously. It is guaranteed that this function will not be + // invoked after the iterator is deleted because the model node that owns + // the function is deleted when the iterator is deleted. + void AddTunableParameter(IteratorContext* ctx, const string& name, + int64 value, int64 min, int64 max, + std::function&& set_fn) { + if (ctx->model()) { + std::shared_ptr node = ctx->model()->LookupNode(prefix()); + if (node) { + node->add_tunable_param(name, value, min, max, std::move(set_fn)); } } } diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 250b006641..b3fe357ea1 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -15,52 +15,28 @@ limitations under the License. #include "tensorflow/core/framework/model.h" +#include + +#include "tensorflow/core/lib/gtl/map_util.h" + namespace tensorflow { namespace data { namespace model { // TODO(jsimsa): Use `Node` subclassing instead of types and node statements. -void Node::CollectKnobs(std::vector* knobs) { +void Node::CollectTunables( + std::vector>* tunables) { mutex_lock l(mu_); + for (auto input : inputs_) { + input->CollectTunables(tunables); + } switch (type_) { - case Type::PARALLEL_INTERLEAVE_V2: { - for (auto input : inputs_) { - input->CollectKnobs(knobs); - } - int64 processing_time = static_cast( - static_cast(ProcessingTimeLocked() - - inputs_.front()->ProcessingTime()) / - static_cast(inputs_.size() - 1)); - knobs->emplace_back( - Node::Knob{this, processing_time, metadata_["parallelism"]}); - return; - } case Type::MAP_AND_BATCH: + case Type::PARALLEL_INTERLEAVE_V2: case Type::PARALLEL_MAP: { - for (auto input : inputs_) { - input->CollectKnobs(knobs); - } - knobs->emplace_back( - Node::Knob{this, NanosPerElementLocked(), metadata_["parallelism"]}); - return; - } - case Type::BATCH: - case Type::CACHE: - case Type::CONCATENATE: - case Type::FILTER: - case Type::FLAT_MAP: - case Type::INTERLEAVE: - case Type::MAP: - case Type::PADDED_BATCH: - case Type::PARALLEL_INTERLEAVE: - case Type::PREFETCH: - case Type::REPEAT: - case Type::SHUFFLE: - case Type::SKIP: - case Type::TAKE: - case Type::ZIP: { - for (auto input : inputs_) { - input->CollectKnobs(knobs); + if (auto* tunable_param = + gtl::FindOrNull(tunable_params_, "parallelism")) { + tunables->push_back(*tunable_param); } return; } @@ -69,12 +45,19 @@ void Node::CollectKnobs(std::vector* knobs) { } } +int64 Node::GetParameterValue(const string& name) { + if (auto* tunable_param = gtl::FindOrNull(tunable_params_, name)) { + return (*tunable_param)->value; + } + return constant_params_[name]; +} + int64 Node::ProcessingTimeLocked() { switch (type_) { case Type::BATCH: case Type::MAP_AND_BATCH: case Type::PADDED_BATCH: { - int64 batch_size = metadata_["batch_size"]; + int64 batch_size = GetParameterValue("batch_size"); return NanosPerElementLocked() + batch_size * ProcessingTimeForInputs(); } case Type::FILTER: { @@ -122,7 +105,7 @@ int64 Node::OutputTimeLocked(std::vector* input_times) { switch (type_) { case Type::BATCH: case Type::PADDED_BATCH: { - double batch_size = metadata_["batch_size"]; + double batch_size = GetParameterValue("batch_size"); int64 old_value = (*input_times)[input_times->size() - 1]; (*input_times)[input_times->size() - 1] = static_cast( static_cast(old_value + NanosPerElementLocked()) / @@ -168,8 +151,8 @@ int64 Node::OutputTimeLocked(std::vector* input_times) { static_cast(inputs_.size() - 1); } case Type::MAP_AND_BATCH: { - double batch_size = metadata_["batch_size"]; - double parallelism = metadata_["parallelism"]; + double batch_size = GetParameterValue("batch_size"); + double parallelism = GetParameterValue("parallelism"); int64 delta = static_cast(static_cast(NanosPerElementLocked()) / (batch_size * parallelism)); @@ -182,22 +165,41 @@ int64 Node::OutputTimeLocked(std::vector* input_times) { return std::max(0LL, output_time - input_times->at(input_times->size() - 2)); } - case Type::PARALLEL_INTERLEAVE: + case Type::PARALLEL_INTERLEAVE: { + // TODO(jsimsa): model the first input + if (inputs_.size() <= 1) { + return NanosPerElementLocked(); + } + int64 delta = static_cast(NanosPerElementLocked()) * + static_cast(inputs_.size() - 1); + input_times->push_back(delta); + auto cleanup = + gtl::MakeCleanup([input_times]() { input_times->pop_back(); }); + int64 inputs_output_time = OutputTimeForInputs(input_times) - + inputs_.front()->OutputTime(input_times); + double parallelism = GetParameterValue("parallelism"); + int64 output_time = + NanosPerElementLocked() + ((static_cast(inputs_output_time) / + static_cast(inputs_.size() - 1)) / + parallelism); + return std::max(0LL, + output_time - input_times->at(input_times->size() - 2)); + } case Type::PARALLEL_INTERLEAVE_V2: { // TODO(jsimsa): model the first input if (inputs_.size() <= 1) { return NanosPerElementLocked(); } - int64 delta = - static_cast(static_cast(NanosPerElementLocked()) * - static_cast(inputs_.size() - 1)); + int64 delta = static_cast(NanosPerElementLocked()) * + static_cast(inputs_.size() - 1); input_times->push_back(delta); auto cleanup = gtl::MakeCleanup([input_times]() { input_times->pop_back(); }); int64 inputs_output_time = OutputTimeForInputs(input_times) - inputs_.front()->OutputTime(input_times); - double parallelism = std::min(port::NumSchedulableCPUs(), - static_cast(metadata_["parallelism"])); + double parallelism = + std::min(static_cast(GetParameterValue("cycle_length")), + static_cast(GetParameterValue("parallelism"))); int64 output_time = NanosPerElementLocked() + ((static_cast(inputs_output_time) / static_cast(inputs_.size() - 1)) / @@ -206,8 +208,9 @@ int64 Node::OutputTimeLocked(std::vector* input_times) { output_time - input_times->at(input_times->size() - 2)); } case Type::PARALLEL_MAP: { - double parallelism = std::min(port::NumSchedulableCPUs(), - static_cast(metadata_["parallelism"])); + double parallelism = + std::min(port::NumSchedulableCPUs(), + static_cast(GetParameterValue("parallelism"))); int64 delta = static_cast( static_cast(NanosPerElementLocked()) / parallelism); input_times->push_back(delta); @@ -248,23 +251,6 @@ int64 Node::OutputTimeLocked(std::vector* input_times) { } } -Model::Model(const proto::Model& model_proto) { - id_counter_ = model_proto.id_counter(); - std::map> lookup_table; - for (auto node_proto : model_proto.node()) { - std::shared_ptr node(new Node(node_proto)); - lookup_table[node_proto.id()] = node; - } - for (auto node_proto : model_proto.node()) { - std::shared_ptr node = lookup_table[node_proto.id()]; - for (int64 id : node_proto.input()) { - node->add_input(lookup_table[id]); - } - node->set_output(lookup_table[node_proto.output()]); - } - output_ = lookup_table[model_proto.output()]; -} - std::shared_ptr Model::AddNode(const string& name, const string& output_name) { mutex_lock l(mu_); @@ -294,94 +280,77 @@ std::shared_ptr Model::LookupNode(const string& name) { return result; } -void Model::Optimize() { - mutex_lock l(mu_); - int64 processing_time = ProcessingTime(); - int64 num_cpus = port::NumSchedulableCPUs(); - std::vector knobs = CollectKnobs(); - // The optimization algorithm starts by setting all parallelism knobs to 1. It - // then repeatedly identifies the knob that, when turned up by 1, decreases - // the output time the most. This process is repeated until all knobs reach - // the number of schedulable CPUs or the projected output time is less than or - // equal to the processing time needed to produce an element divided by the - // number of schedulable CPUs. - for (auto& knob : knobs) { - LOG(INFO) << knob.node->name() << " " << knob.processing_time; - knob.value = 1; - knob.node->set_metadata("parallelism", knob.value); - } - while (true) { - int64 output_time = OutputTime(); - bool all_knobs = true; - for (auto knob : knobs) { - if (knob.value < num_cpus) { - all_knobs = false; +// The optimization algorithm starts by setting all tunable parallelism +// parameters to 1. It then repeatedly identifies the parameter that whose +// increase in parallelism decreases the output time the most. This process is +// repeated until all parameters reach their maximum values or the +// projected output time is less than or equal to the processing time needed to +// produce an element divided by CPU budget. +void Model::Optimize(int64 cpu_budget) { + mutex_lock l(optimization_mu_); + std::vector> tunables; + { + mutex_lock l2(mu_); + const int64 processing_time = ProcessingTime(); + tunables = CollectTunables(); + for (auto tunable : tunables) { + tunable->value = 1; + } + while (true) { + const int64 output_time = OutputTime(); + bool all_tunables = true; + for (auto& tunable : tunables) { + if (tunable->value < tunable->max) { + all_tunables = false; + break; + } + } + if (output_time < processing_time / cpu_budget || all_tunables) { break; } - } - if (output_time < processing_time / num_cpus || all_knobs) { - break; - } - int64 best_delta = -1; - int best_knob = -1; - for (int i = 0; i < knobs.size(); ++i) { - if (knobs[i].value == num_cpus) { - continue; + int64 best_delta = -1; + Node::Tunable* best_tunable = nullptr; + for (auto& tunable : tunables) { + if (tunable->value == tunable->max) { + continue; + } + tunable->value++; + int64 delta = output_time - OutputTime(); + if (delta > best_delta) { + best_delta = delta; + best_tunable = tunable.get(); + } + tunable->value--; } - knobs[i].node->set_metadata("parallelism", knobs[i].value + 1); - int64 delta = output_time - OutputTime(); - if (delta > best_delta) { - best_delta = delta; - best_knob = i; + if (best_tunable) { + // NOTE: This can happen because we are performing the optimization + // while the model data is changing. If this becomes an issue, we should + // look into performing the optimization using a model snapshot. + break; } - knobs[i].node->set_metadata("parallelism", knobs[i].value); + best_tunable->value++; } - knobs[best_knob].value++; - knobs[best_knob].node->set_metadata("parallelism", knobs[best_knob].value); } - for (auto knob : knobs) { - LOG(INFO) << knob.node->name() << " " << knob.value; + // The `set_fn` functions should be invoked without holding a lock to avoid a + // potential deadlock. + for (auto& tunable : tunables) { + tunable->set_fn(tunable->value); } - LOG(INFO) << "output time: " << OutputTime(); - LOG(INFO) << "processing time: " << ProcessingTime(); -} - -void Model::OutputToFile() { - proto::Model model_proto; - ToProto(&model_proto); - string filename; - Env::Default()->LocalTempFilename(&filename); - TF_CHECK_OK(WriteStringToFile(Env::Default(), filename, - model_proto.SerializeAsString())); - LOG(INFO) << filename; } void Model::RemoveNode(const string& prefix) { - mutex_lock l(mu_); + // Nodes are not allowed to be removed when optimization is in progress to + // prevent the optimization from trying to access an iterator that was + // concurrently deleted. + mutex_lock l(optimization_mu_); + mutex_lock l2(mu_); lookup_table_.erase(prefix); } -void Model::ToProto(proto::Model* model_proto) { - mutex_lock l(mu_); - model_proto->set_id_counter(id_counter_); - model_proto->set_output(output_->id()); - AddNodeToProto(output_, model_proto); -} - -// static -void Model::AddNodeToProto(const std::shared_ptr& node, - proto::Model* model_proto) { - proto::Node* node_proto = model_proto->add_node(); - node->ToProto(node_proto); - for (const std::shared_ptr& input : node->inputs()) { - AddNodeToProto(input, model_proto); - } -} - -std::vector Model::CollectKnobs() { - std::vector knobs; - output_->CollectKnobs(&knobs); - return knobs; +std::vector> Model::CollectTunables() { + std::vector> tunables; + output_->CollectTunables(&tunables); + return tunables; } int64 Model::OutputTime() { diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index 98172909bf..f88ec06ef3 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -22,7 +22,6 @@ limitations under the License. #include #include -#include "tensorflow/core/framework/model.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" @@ -61,13 +60,10 @@ class Node { public: Node(int64 id, std::shared_ptr output) : id_(id), output_(output) {} - explicit Node(const proto::Node& node_proto) : id_(node_proto.id()) { - name_ = node_proto.name(); - type_ = TypeFromName(node_proto.name()); - processing_time_ = node_proto.processing_time(); - num_elements_ = node_proto.num_elements(); - metadata_.insert(node_proto.metadata().begin(), - node_proto.metadata().end()); + // Adds a constant parameter. + void add_constant_param(const string& name, int64 value) LOCKS_EXCLUDED(mu_) { + mutex_lock l(mu_); + constant_params_[name] = value; } // Records that the node produced an element. @@ -88,6 +84,15 @@ class Node { processing_time_ += delta; } + // Adds a tunable parameter. + void add_tunable_param(const string& name, int64 value, int64 min, int64 max, + std::function&& set_fn) + LOCKS_EXCLUDED(mu_) { + mutex_lock l(mu_); + tunable_params_[name] = + std::make_shared(value, min, max, std::move(set_fn)); + } + // Returns the unique node ID. int64 id() LOCKS_EXCLUDED(mu_) { return id_; } @@ -121,12 +126,6 @@ class Node { inputs_.remove(input); } - // Adds the given key-value pair to the node metadata. - void set_metadata(const string& key, int64 value) LOCKS_EXCLUDED(mu_) { - mutex_lock l(mu_); - metadata_[key] = value; - } - // Sets the node name. void set_name(const string& name) LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); @@ -157,11 +156,16 @@ class Node { } private: - // Represents a performance knob. - struct Knob { - Node* node; - int64 processing_time; + // Represents a tunable parameter. + struct Tunable { + Tunable(int64 value, int64 min, int64 max, + std::function set_fn) + : value(value), min(min), max(max), set_fn(std::move(set_fn)) {} + int64 value; + int64 min; + int64 max; + std::function set_fn; }; enum class Type { @@ -186,8 +190,12 @@ class Node { UNKNOWN, }; - // Collects performance knobs in the subtree rooted in this node. - void CollectKnobs(std::vector* knobs) LOCKS_EXCLUDED(mu_); + // Collects tunable parameters in the subtree rooted in this node. + void CollectTunables(std::vector>* tunables) + LOCKS_EXCLUDED(mu_); + + // Gets a value of the given parameter (tunable or constant). + int64 GetParameterValue(const string& name) EXCLUSIVE_LOCKS_REQUIRED(mu_); // Returns the per-element processing time spent in this node. int64 NanosPerElement() LOCKS_EXCLUDED(mu_) { @@ -238,22 +246,6 @@ class Node { return sum; } - // Serializes the node state into the given proto. - void ToProto(proto::Node* node_proto) LOCKS_EXCLUDED(mu_) { - mutex_lock l(mu_); - node_proto->set_id(id_); - node_proto->set_name(name_); - node_proto->set_num_elements(num_elements_); - node_proto->set_processing_time(processing_time_); - for (const std::shared_ptr& input : inputs_) { - node_proto->add_input(input->id()); - } - if (output_) { - node_proto->set_output(output_->id()); - } - node_proto->mutable_metadata()->insert(metadata_.begin(), metadata_.end()); - } - Type TypeFromName(const string& name) EXCLUSIVE_LOCKS_REQUIRED(mu_) { if (name_ == "Batch") { return Type::BATCH; @@ -319,7 +311,9 @@ class Node { int64 processing_time_ GUARDED_BY(mu_) = 0; int64 num_elements_ GUARDED_BY(mu_) = 0; std::map work_start_ GUARDED_BY(mu_); - std::map metadata_ GUARDED_BY(mu_); + std::map constant_params_ GUARDED_BY(mu_); + // Tunables are shared with the model during optimization. + std::map> tunable_params_ GUARDED_BY(mu_); std::list> inputs_ GUARDED_BY(mu_); std::shared_ptr output_ GUARDED_BY(mu_); @@ -330,21 +324,15 @@ class Node { // for collecting runtime information and optimizing performance. It collects // runtime information about execution of the input pipeline that is used to // create a performance model, which is in turn used to identify optimal values -// of performance knobs. +// of tunable parameters. // // Developers of tf.data transformations are not expected to interact with this // class directly. Boiler plate code for creating the abstract representation of // the input pipeline and collecting runtime information has been added to the // implementation of `DatasetBase` and `DatasetBaseIterator` respectively. -// -// TODO(jsimsa): Add a mechanism for feeding the result of the optimization -// into the input pipeline. class Model { public: Model() = default; - explicit Model(const proto::Model& model_proto); - - ~Model() {} // Returns the model output node. std::shared_ptr output() LOCKS_EXCLUDED(mu_) { @@ -360,30 +348,25 @@ class Model { std::shared_ptr LookupNode(const string& name) LOCKS_EXCLUDED(mu_); // Runs optimization. - void Optimize() LOCKS_EXCLUDED(mu_); - - // Outputs the state of a model to a file. - // - // TODO(jsimsa): Remove this method once the optimization loop is closed. - void OutputToFile() LOCKS_EXCLUDED(mu_); + void Optimize(int64 cpu_budget) LOCKS_EXCLUDED(mu_); // Removes the node identified by the given name. void RemoveNode(const string& prefix) LOCKS_EXCLUDED(mu_); - // Serializes the model state to the given proto. - void ToProto(proto::Model* model_proto) LOCKS_EXCLUDED(mu_); - private: - static void AddNodeToProto(const std::shared_ptr& node, - proto::Model* model_proto); - - std::vector CollectKnobs() EXCLUSIVE_LOCKS_REQUIRED(mu_); + std::vector> CollectTunables() + EXCLUSIVE_LOCKS_REQUIRED(mu_); int64 OutputTime() EXCLUSIVE_LOCKS_REQUIRED(mu_); int64 ProcessingTime() EXCLUSIVE_LOCKS_REQUIRED(mu_); + // Used for coordination between different input pipeline threads. mutex mu_; + // Used for preventing iterator deletion when optimization is in progress + // because the optimization may try to update the values of tunable + // parameters. + mutex optimization_mu_ ACQUIRED_BEFORE(mu_); int64 id_counter_ GUARDED_BY(mu_) = 1; std::shared_ptr output_ GUARDED_BY(mu_); std::map> lookup_table_ GUARDED_BY(mu_); diff --git a/tensorflow/core/framework/model.proto b/tensorflow/core/framework/model.proto deleted file mode 100644 index 26000007af..0000000000 --- a/tensorflow/core/framework/model.proto +++ /dev/null @@ -1,30 +0,0 @@ -syntax = "proto3"; - -package tensorflow.data.model.proto; -option cc_enable_arenas = true; - -message Model { - // Counter used for generating new node IDs. - int64 id_counter = 1; - // Nodes of this model. - repeated Node node = 2; - // The ID of the output node. - int64 output = 3; -}; - -message Node { - // The node ID. - int64 id = 1; - // The node name. - string name = 2; - // Input node IDs. - repeated int64 input = 3; - // Output node ID. - int64 output = 4; - // Number of elements produced by the node. - int64 num_elements = 5; - // The CPU time spent by running threads of this node. - int64 processing_time = 6; - // Key-value store for node metadata (e.g. batch size or parallelism). - map metadata = 7; -}; diff --git a/tensorflow/core/kernels/data/batch_dataset_op.cc b/tensorflow/core/kernels/data/batch_dataset_op.cc index 887b8c8365..d1db1d7bec 100644 --- a/tensorflow/core/kernels/data/batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/batch_dataset_op.cc @@ -117,7 +117,7 @@ class BatchDatasetOp : public UnaryDatasetOpKernel { : DatasetIterator(params) {} Status Initialize(IteratorContext* ctx) override { - SetMetadata(ctx, "batch_size", dataset()->batch_size_); + AddConstantParameter(ctx, "batch_size", dataset()->batch_size_); return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); } diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 85e49355d3..80efac5d4b 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/tracing.h" namespace tensorflow { @@ -39,7 +40,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { public: explicit MapAndBatchDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx), - graph_def_version_(ctx->graph_def_version()), op_version_(ctx->def().op() == "MapAndBatchDataset" ? 1 : 2) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); @@ -77,7 +77,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { case 2: OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls", &num_parallel_calls)); - OP_REQUIRES(ctx, num_parallel_calls > 0, + OP_REQUIRES(ctx, + num_parallel_calls > 0 || num_parallel_calls == kAutoTune, errors::InvalidArgument( "num_parallel_calls must be greater than zero.")); break; @@ -190,7 +191,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { class Iterator : public DatasetIterator { public: explicit Iterator(const Params& params) - : DatasetIterator(params) {} + : DatasetIterator(params), + num_parallel_calls_(params.dataset->num_parallel_calls_) {} ~Iterator() override { mutex_lock l(mu_); @@ -204,8 +206,24 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } Status Initialize(IteratorContext* ctx) override { - SetMetadata(ctx, "batch_size", dataset()->batch_size_); - SetMetadata(ctx, "parallelism", dataset()->num_parallel_calls_); + mutex_lock l(mu_); + AddConstantParameter(ctx, "batch_size", dataset()->batch_size_); + if (num_parallel_calls_ == kAutoTune) { + num_parallel_calls_ = 1; + std::function set_fn = [this](int64 value) { + { + mutex_lock l(mu_); + num_parallel_calls_ = value; + } + VLOG(2) << "setting parallelism knob to " << value; + cond_var_.notify_all(); + }; + AddTunableParameter( + ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */, + port::NumSchedulableCPUs() /* max */, std::move(set_fn)); + } else { + AddConstantParameter(ctx, "parallelism", num_parallel_calls_); + } TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); return dataset()->captured_func_->Instantiate(ctx); @@ -428,7 +446,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } int MaxBatchResults() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - return (dataset()->num_parallel_calls_ + dataset()->batch_size_ - 1) / + return (num_parallel_calls_ + dataset()->batch_size_ - 1) / dataset()->batch_size_; } @@ -480,15 +498,18 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { void RunnerThread(const std::shared_ptr& ctx) LOCKS_EXCLUDED(mu_) { std::vector, int64>> new_calls; - new_calls.reserve(dataset()->num_parallel_calls_); StartWork(ctx.get()); auto stop_cleanup = gtl::MakeCleanup([this, &ctx]() { StopWork(ctx.get()); }); + { + tf_shared_lock l(mu_); + new_calls.reserve(num_parallel_calls_); + } while (true) { { mutex_lock l(mu_); while (!cancelled_ && - (num_calls_ >= dataset()->num_parallel_calls_ || + (num_calls_ >= num_parallel_calls_ || batch_results_.size() > MaxBatchResults() || (batch_results_.size() == MaxBatchResults() && call_counter_ % dataset()->batch_size_ == 0))) { @@ -501,7 +522,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { return; } - while (num_calls_ < dataset()->num_parallel_calls_ && + while (num_calls_ < num_parallel_calls_ && (batch_results_.size() < MaxBatchResults() || (batch_results_.size() == MaxBatchResults() && call_counter_ % dataset()->batch_size_ != 0))) { @@ -648,6 +669,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { // user specified level of parallelism and there are slots available in // the `batch_results_` buffer. condition_variable cond_var_; + // Identifies the maximum number of parallel calls. + int64 num_parallel_calls_ GUARDED_BY(mu_) = 0; // Counts the number of outstanding calls for this batch. int64 num_calls_ GUARDED_BY(mu_) = 0; // Counts the total number of calls. @@ -671,7 +694,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { const Eigen::ThreadPoolDevice* device_; // not owned }; - const int graph_def_version_; const int op_version_; DataTypeVector output_types_; std::vector output_shapes_; diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc index c7f929dbc1..63025d3371 100644 --- a/tensorflow/core/kernels/data/model_dataset_op.cc +++ b/tensorflow/core/kernels/data/model_dataset_op.cc @@ -17,11 +17,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/dataset.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/platform/cpu_info.h" namespace tensorflow { namespace data { namespace { +const int kOptimizationPeriodThresholdMs = 60 * EnvTime::kSecondsToMicros; + class ModelDatasetOp : public UnaryDatasetOpKernel { public: explicit ModelDatasetOp(OpKernelConstruction* ctx) @@ -71,9 +74,8 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { class Iterator : public DatasetIterator { public: explicit Iterator(const Params& params) - : DatasetIterator(params), model_(new model::Model()) {} - - ~Iterator() override { model_->OutputToFile(); } + : DatasetIterator(params), + model_(std::make_shared()) {} Status Initialize(IteratorContext* ctx) override { IteratorContext ctx_with_model(CreateParams(ctx)); @@ -85,6 +87,21 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { std::vector* out_tensors, bool* end_of_sequence) override { mutex_lock l(mu_); + int64 now = ctx->env()->NowMicros() / EnvTime::kMillisToMicros; + if (last_optimization_ms_ + optimization_period_ms_ < now) { + model_->Optimize(port::NumSchedulableCPUs()); + // Exponentially increase the period of running the optimization until + // a threshold is reached. + if (optimization_period_ms_ < kOptimizationPeriodThresholdMs) { + if (optimization_period_ms_ << 1 < kOptimizationPeriodThresholdMs) { + optimization_period_ms_ <<= 1; + } else { + optimization_period_ms_ = kOptimizationPeriodThresholdMs; + } + } + last_optimization_ms_ = + ctx->env()->NowMicros() / EnvTime::kMillisToMicros; + } IteratorContext ctx_with_model(CreateParams(ctx)); return input_impl_->GetNext(&ctx_with_model, out_tensors, end_of_sequence); @@ -113,6 +130,8 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { private: mutex mu_; std::shared_ptr model_; + int64 last_optimization_ms_ GUARDED_BY(mu_) = 0; + int64 optimization_period_ms_ GUARDED_BY(mu_) = 10; std::unique_ptr input_impl_ GUARDED_BY(mu_); }; diff --git a/tensorflow/core/kernels/data/padded_batch_dataset_op.cc b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc index 73eeafd797..7b01c3b4e0 100644 --- a/tensorflow/core/kernels/data/padded_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/padded_batch_dataset_op.cc @@ -207,7 +207,7 @@ class PaddedBatchDatasetOp : public UnaryDatasetOpKernel { : DatasetIterator(params) {} Status Initialize(IteratorContext* ctx) override { - SetMetadata(ctx, "batch_size", dataset()->batch_size_); + AddConstantParameter(ctx, "batch_size", dataset()->batch_size_); return dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_); } diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index aa5e613e24..2f2db09508 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -252,7 +252,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { } Status Initialize(IteratorContext* ctx) override { - SetMetadata(ctx, "parallelism", dataset()->cycle_length_); + AddConstantParameter(ctx, "parallelism", dataset()->cycle_length_); TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); return dataset()->captured_func_->Instantiate(ctx); @@ -1120,7 +1120,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { int64 num_parallel_calls; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls", &num_parallel_calls)); - OP_REQUIRES(ctx, num_parallel_calls > 0, + OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune, errors::InvalidArgument( "num_parallel_calls must be greater than zero.")); OP_REQUIRES( @@ -1233,6 +1233,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { args_list_(params.dataset->cycle_length_), current_elements_(params.dataset->cycle_length_), element_in_use_(params.dataset->cycle_length_, false), + num_parallel_calls_(params.dataset->num_parallel_calls_), thread_pool_(new thread::ThreadPool( Env::Default(), ThreadOptions(), "parallel_interleave", dataset()->cycle_length_ /* num_threads */, @@ -1250,7 +1251,24 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { } Status Initialize(IteratorContext* ctx) override { - SetMetadata(ctx, "parallelism", dataset()->num_parallel_calls_); + mutex_lock l(mu_); + if (num_parallel_calls_ == kAutoTune) { + num_parallel_calls_ = 1; + auto set_fn = [this](int64 value) { + { + mutex_lock l(mu_); + num_parallel_calls_ = value; + } + VLOG(2) << "setting parallelism knob to " << value; + cond_var_.notify_all(); + }; + AddTunableParameter( + ctx, "parallelism", num_parallel_calls_ /* value */, 1 /* min */, + dataset()->cycle_length_ /* max */, std::move(set_fn)); + } else { + AddConstantParameter(ctx, "parallelism", num_parallel_calls_); + } + AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_); TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); return dataset()->captured_func_->Instantiate(ctx); @@ -1459,7 +1477,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { // not in use and there is space in the `invocation_results_` queue. while (!cancelled_ && (!end_of_input_ || num_open_ > 0) && (element_in_use_[cycle_index_] || - num_calls_ >= dataset()->num_parallel_calls_ || + num_calls_ >= num_parallel_calls_ || invocation_results_.size() >= MaxInvocationResults())) { StopWork(ctx.get()); cond_var_.wait(l); @@ -1472,7 +1490,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { while (!element_in_use_[cycle_index_] && (!end_of_input_ || num_open_ > 0) && - num_calls_ < dataset()->num_parallel_calls_ && + num_calls_ < num_parallel_calls_ && invocation_results_.size() < MaxInvocationResults()) { if (!current_elements_[cycle_index_]) { // Try to create a new iterator from the next input element. @@ -1647,6 +1665,9 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { // Identifies the number of open iterators. int64 num_open_ GUARDED_BY(mu_) = 0; + // Identifies the maximum number of parallel calls. + int64 num_parallel_calls_ GUARDED_BY(mu_) = 0; + // Identifies the number of outstanding calls. int64 num_calls_ GUARDED_BY(mu_) = 0; diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 0795987431..b584316d69 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -55,7 +55,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { int32 num_parallel_calls; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls", &num_parallel_calls)); - OP_REQUIRES(ctx, num_parallel_calls > 0, + OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune, errors::InvalidArgument( "num_parallel_calls must be greater than zero.")); diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index 0b6e587881..5f6052ce83 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/platform/cpu_info.h" namespace tensorflow { namespace data { @@ -55,7 +56,25 @@ class ParallelMapIterator : public DatasetBaseIterator { } Status Initialize(IteratorContext* ctx) override { - SetMetadata(ctx, "parallelism", num_parallel_calls_); + mutex_lock l(mu_); + if (num_parallel_calls_ == kAutoTune) { + num_parallel_calls_ = 1; + auto set_fn = [this](int64 value) { + { + mutex_lock l(mu_); + num_parallel_calls_ = value; + } + VLOG(2) << "setting parallelism knob to " << value; + cond_var_.notify_all(); + }; + // TODO(jsimsa): Surface the number of threads used by `ctx->runner()` and + // use it here for the maximum. + AddTunableParameter(ctx, "parallelism", num_parallel_calls_ /* value */, + 1 /* min */, port::NumSchedulableCPUs() /* max */, + std::move(set_fn)); + } else { + AddConstantParameter(ctx, "parallelism", num_parallel_calls_); + } TF_RETURN_IF_ERROR( input_dataset_->MakeIterator(ctx, prefix(), &input_impl_)); if (init_func_) { @@ -211,8 +230,6 @@ class ParallelMapIterator : public DatasetBaseIterator { std::move(done)); } - int64 MaxInvocationResults() { return num_parallel_calls_; } - Status ProcessResult(const std::shared_ptr& result, std::vector* out_tensors, bool* end_of_sequence) { @@ -235,13 +252,16 @@ class ParallelMapIterator : public DatasetBaseIterator { StartWork(ctx.get()); auto cleanup = gtl::MakeCleanup([this, ctx] { StopWork(ctx.get()); }); std::vector> new_calls; - new_calls.reserve(num_parallel_calls_); + { + tf_shared_lock l(mu_); + new_calls.reserve(num_parallel_calls_); + } while (true) { { mutex_lock l(mu_); while (!cancelled_ && (num_calls_ >= num_parallel_calls_ || - invocation_results_.size() >= MaxInvocationResults())) { + invocation_results_.size() >= num_parallel_calls_)) { StopWork(ctx.get()); cond_var_.wait(l); StartWork(ctx.get()); @@ -250,7 +270,7 @@ class ParallelMapIterator : public DatasetBaseIterator { return; } while (num_calls_ < num_parallel_calls_ && - invocation_results_.size() < MaxInvocationResults()) { + invocation_results_.size() < num_parallel_calls_) { invocation_results_.emplace_back(new InvocationResult()); new_calls.push_back(invocation_results_.back()); num_calls_++; @@ -305,7 +325,6 @@ class ParallelMapIterator : public DatasetBaseIterator { const DatasetBase* const input_dataset_; // Not owned. const std::function init_func_; const ParallelMapIteratorFunction map_func_; - const int32 num_parallel_calls_; // Used for coordination between the main thread and the runner thread. mutex mu_; // Used for coordination between the main thread and the runner thread. In @@ -314,6 +333,8 @@ class ParallelMapIterator : public DatasetBaseIterator { // parallelism and there are slots available in the `invocation_results_` // buffer. condition_variable cond_var_; + // Identifies the maximum number of parallel calls. + int64 num_parallel_calls_ GUARDED_BY(mu_) = 0; // Counts the number of outstanding calls. int64 num_calls_ GUARDED_BY(mu_) = 0; std::unique_ptr input_impl_; -- GitLab From 422158776bcd9ffbde485610fdd3af498a2d5669 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 17 Sep 2018 09:43:24 -0700 Subject: [PATCH 0260/1357] Increase tolerance in linalg_grad_test to fix #19935 Fixes #19935 PiperOrigin-RevId: 213286535 --- tensorflow/python/kernel_tests/linalg_grad_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py index cd6a34d657..e52f303fe0 100644 --- a/tensorflow/python/kernel_tests/linalg_grad_test.py +++ b/tensorflow/python/kernel_tests/linalg_grad_test.py @@ -120,7 +120,7 @@ def _GetMatrixBinaryFunctorGradientTest(functor_, delta = epsilon**(1.0 / 3.0) # tolerance obtained by looking at actual differences using # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build - tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.04 + tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.05 # The gradients for a and b may be of very different magnitudes, # so to not get spurious failures we test them separately. for factor, factor_init in [a, a_np], [b, b_np]: -- GitLab From 7820ead0c58c9d90d7776bea31a294bbcc9a30f8 Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 30 Jul 2018 09:46:05 -0500 Subject: [PATCH 0261/1357] Make full model before calling set_model on callback Commit 1b67ccbe8006eacffd268553abd01310e8b187d6 removed the _make_train_function calls from Keras training fit_generator for eager execution. This breaks some callbacks that depend on the entire model to be populated on the set_model or on_train_begin methods. This commit adds the method calls back in but guarded by an eager check. It is not doing a revert / fix because the fix that removed the calls also put a test case in for eager fit_generator testing which we want to retain. --- tensorflow/python/keras/engine/training_generator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py index 413c1f4fba..2e074699da 100644 --- a/tensorflow/python/keras/engine/training_generator.py +++ b/tensorflow/python/keras/engine/training_generator.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np +from tensorflow.python.eager import context from tensorflow.python.keras import callbacks as cbks from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer @@ -48,6 +49,10 @@ def fit_generator(model, epoch = initial_epoch do_validation = bool(validation_data) + if not context.executing_eagerly(): + model._make_train_function() + if do_validation: + model._make_test_function() is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: @@ -233,6 +238,9 @@ def evaluate_generator(model, use_multiprocessing=False, verbose=0): """See docstring for `Model.evaluate_generator`.""" + if not context.executing_eagerly(): + model._make_test_function() + if hasattr(model, 'metrics'): for m in model.stateful_metric_functions: m.reset_states() @@ -342,6 +350,9 @@ def predict_generator(model, use_multiprocessing=False, verbose=0): """See docstring for `Model.predict_generator`.""" + if not context.executing_eagerly(): + model._make_test_function() + steps_done = 0 wait_time = 0.01 all_outs = [] -- GitLab From 66575e0537ba8952de8ebc45d45d1b9e4ba1b6ba Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Thu, 2 Aug 2018 13:39:48 -0500 Subject: [PATCH 0262/1357] Add unit test for fit_generator changes Add unit test for fit_generator change for callbacks. --- .../python/keras/engine/training_test.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 30be4131a4..465b4ad65f 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import metrics as metrics_module +from tensorflow.python.keras import callbacks from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras.utils.generic_utils import slice_arrays @@ -1190,6 +1191,37 @@ class TestGeneratorMethods(test.TestCase): use_multiprocessing=False, workers=0) + def test_fit_generator_with_callback(self): + model = keras.Sequential() + model.add(keras.layers.Dense(4, input_shape=(3,))) + optimizer = RMSPropOptimizer(learning_rate=0.001) + model.compile(optimizer, 'mse', metrics=['mae']) + + x = np.random.random((10, 3)) + y = np.random.random((10, 4)) + + def iterator(): + while 1: + yield x, y + + class TestCallback(callbacks.Callback): + def set_model(self, model): + # Check the model operations for the optimizer operations that + # the _make_train_function adds under a named scope for the + # optimizer. This ensurs the full model is populated before the + # set_model callback is called. + optimizer_name_scope = 'training/TFOptimizer/' + graph_def = ops.get_default_graph().as_graph_def() + for node in graph_def.node: + if node.name.startswith(optimizer_name_scope): + return + raise RuntimeError('The optimizer operations are not present in the ' + 'model graph when the Callback.set_model function ' + 'is called') + + model.fit_generator(iterator(), steps_per_epoch=3, epochs=1, + callbacks=[TestCallback()]) + def test_generator_methods_with_sample_weights(self): arr_data = np.random.random((50, 2)) arr_labels = np.random.random((50,)) -- GitLab From da3ccfda9b75f3cf60eb237d9a4da68c436e9f18 Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 17 Sep 2018 11:59:14 -0500 Subject: [PATCH 0263/1357] Move test to callbacks_test --- tensorflow/python/keras/callbacks_test.py | 40 +++++++++++++++++++ .../python/keras/engine/training_test.py | 31 -------------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index b6fae19823..28f7614463 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -30,6 +30,7 @@ import numpy as np from tensorflow.core.framework import summary_pb2 from tensorflow.python import keras +from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import test_util from tensorflow.python.keras import testing_utils @@ -1222,6 +1223,45 @@ class KerasCallbacksTest(test.TestCase): callbacks=cbks, epochs=1) + def test_fit_generator_with_callback(self): + + class TestCallback(keras.callbacks.Callback): + def set_model(self, model): + # Check the model operations for the optimizer operations that + # the _make_train_function adds under a named scope for the + # optimizer. This ensurs the full model is populated before the + # set_model callback is called. + optimizer_name_scope = 'training/' + model.optimizer.__class__.__name__ + graph_def = ops.get_default_graph().as_graph_def() + for node in graph_def.node: + if node.name.startswith(optimizer_name_scope): + return + raise RuntimeError('The optimizer operations are not present in the ' + 'model graph when the Callback.set_model function ' + 'is called') + np.random.seed(1337) + + def generator(): + x = np.random.randn(10, 100).astype(np.float32) + y = np.random.randn(10, 10).astype(np.float32) + while True: + yield x, y + + with self.cached_session(): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=10, input_dim=100) + model.compile( + loss='categorical_crossentropy', + optimizer='sgd', + metrics=['accuracy']) + model.fit_generator( + generator(), + steps_per_epoch=2, + epochs=1, + validation_data=generator(), + validation_steps=2, + callbacks=[TestCallback()], + verbose=0) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 465b4ad65f..d8510c1f23 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1191,37 +1191,6 @@ class TestGeneratorMethods(test.TestCase): use_multiprocessing=False, workers=0) - def test_fit_generator_with_callback(self): - model = keras.Sequential() - model.add(keras.layers.Dense(4, input_shape=(3,))) - optimizer = RMSPropOptimizer(learning_rate=0.001) - model.compile(optimizer, 'mse', metrics=['mae']) - - x = np.random.random((10, 3)) - y = np.random.random((10, 4)) - - def iterator(): - while 1: - yield x, y - - class TestCallback(callbacks.Callback): - def set_model(self, model): - # Check the model operations for the optimizer operations that - # the _make_train_function adds under a named scope for the - # optimizer. This ensurs the full model is populated before the - # set_model callback is called. - optimizer_name_scope = 'training/TFOptimizer/' - graph_def = ops.get_default_graph().as_graph_def() - for node in graph_def.node: - if node.name.startswith(optimizer_name_scope): - return - raise RuntimeError('The optimizer operations are not present in the ' - 'model graph when the Callback.set_model function ' - 'is called') - - model.fit_generator(iterator(), steps_per_epoch=3, epochs=1, - callbacks=[TestCallback()]) - def test_generator_methods_with_sample_weights(self): arr_data = np.random.random((50, 2)) arr_labels = np.random.random((50,)) -- GitLab From 3fe9c54b6181bc2bbfa535b28ecb7d3b74342bd8 Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 17 Sep 2018 12:13:15 -0500 Subject: [PATCH 0264/1357] Remove unnecessary import of callbacks --- tensorflow/python/keras/engine/training_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index d8510c1f23..30be4131a4 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -31,7 +31,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import metrics as metrics_module -from tensorflow.python.keras import callbacks from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras.utils.generic_utils import slice_arrays -- GitLab From 531d08bd10125b83030f1165d8562e23b20f4941 Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Mon, 17 Sep 2018 10:37:20 -0700 Subject: [PATCH 0265/1357] Minor docstring change: update link to saved_model_cli. PiperOrigin-RevId: 213296537 --- tensorflow/python/tools/saved_model_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index d8ba13d8d2..3dbccd1409 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -15,7 +15,7 @@ """Command-line interface to inspect and execute a graph in a SavedModel. For detailed usages and examples, please refer to: -https://www.tensorflow.org/guide/saved_model_cli +https://www.tensorflow.org/guide/saved_model#cli_to_inspect_and_execute_savedmodel """ -- GitLab From 12718f0204bad8aaa3984c7a176914451eb0bbab Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 17 Sep 2018 13:24:29 -0500 Subject: [PATCH 0266/1357] Fix pylint error --- tensorflow/python/keras/callbacks_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 28f7614463..467bc4cdc4 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1234,8 +1234,8 @@ class KerasCallbacksTest(test.TestCase): optimizer_name_scope = 'training/' + model.optimizer.__class__.__name__ graph_def = ops.get_default_graph().as_graph_def() for node in graph_def.node: - if node.name.startswith(optimizer_name_scope): - return + if node.name.startswith(optimizer_name_scope): + return raise RuntimeError('The optimizer operations are not present in the ' 'model graph when the Callback.set_model function ' 'is called') -- GitLab From e576073771a7484ec27f876963bf731d33b83e38 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 17 Sep 2018 11:22:36 -0700 Subject: [PATCH 0267/1357] [Java]: Release 1.11.0-rc0 PiperOrigin-RevId: 213305616 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/spark-tensorflow-connector/pom.xml | 2 +- tensorflow/java/maven/tensorflow-hadoop/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index f9093ce385..cf6a64daeb 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.10.0 + 1.11.0-rc0 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index 1208956dec..978c3cbf6d 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.10.0 + 1.11.0-rc0 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index 755449cb3c..d1378b5d56 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.10.0 + 1.11.0-rc0 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index e1bf2c7dba..1342b0e9bb 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.10.0 + 1.11.0-rc0 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index b89f042567..19ff65a095 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.10.0 + 1.11.0-rc0 ../ proto diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml index 1b7995be2c..ba7e9f4c69 100644 --- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml @@ -6,7 +6,7 @@ org.tensorflow spark-tensorflow-connector_2.11 jar - 1.10.0 + 1.11.0-rc0 spark-tensorflow-connector https://www.tensorflow.org TensorFlow TFRecord connector for Apache Spark DataFrames diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml index 0fe6f4dce4..f913faffa2 100644 --- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml +++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml @@ -5,7 +5,7 @@ org.tensorflow tensorflow-hadoop jar - 1.10.0 + 1.11.0-rc0 tensorflow-hadoop https://www.tensorflow.org TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 0de90244b1..f6cb595885 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.10.0 + 1.11.0-rc0 ../ tensorflow -- GitLab From deec3bf519bd51f743db15ae28a6335d43ad5dfe Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 17 Sep 2018 11:36:50 -0700 Subject: [PATCH 0268/1357] Fix and complete StreamExecutor's DoFusedConvolve: * bias_nd is set to have CUDNN_DATA_FLOAT, even though BiasType is not float. * double is supported but not exposed through the public interface. * DoFusedConvolveImpl has duplicated information in its template parameter list. PiperOrigin-RevId: 213308435 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 54 ++++++++++++--------- tensorflow/stream_executor/cuda/cuda_dnn.h | 16 +++--- tensorflow/stream_executor/stream.cc | 38 +++++++++++++++ 3 files changed, 77 insertions(+), 31 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 3c533c7f99..63ab367086 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -149,6 +149,16 @@ cudnnDataType_t GetCudnnDataType() { return CUDNN_DATA_HALF; } +template <> +cudnnDataType_t GetCudnnDataType() { + return CUDNN_DATA_INT8; +} + +template <> +cudnnDataType_t GetCudnnDataType() { + return CUDNN_DATA_INT32; +} + // RAII wrapper for all calls to cuDNN with a cuDNN handle argument. // // See CudnnAccess::GetHandle() for details. @@ -2486,19 +2496,19 @@ port::Status CudnnSupport::DoConvolveImpl( return port::Status::OK(); } -template +template port::Status CudnnSupport::DoFusedConvolveImpl( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, - const DeviceMemory& conv_input_data, ScaleType conv_input_scale, - const dnn::FilterDescriptor& filter_descriptor, - const DeviceMemory& filter_data, + const DeviceMemory& conv_input_data, + ScaleType conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, - const DeviceMemory& side_input_data, ScaleType side_input_scale, - const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory& side_input_data, + ScaleType side_input_scale, const dnn::BatchDescriptor& bias_descriptor, const DeviceMemory& biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor& output_descriptor, - DeviceMemory* output_data, ScratchAllocator* scratch_allocator, + DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { if (activation_mode != dnn::ActivationMode::kRelu && @@ -2508,15 +2518,15 @@ port::Status CudnnSupport::DoFusedConvolveImpl( "Relu or None activation."); } - CudnnTensorDescriptor conv_input_nd( - conv_input_descriptor, static_cast(cudnn_data_type)); - CudnnTensorDescriptor output_nd( - output_descriptor, static_cast(cudnn_data_type)); + CudnnTensorDescriptor conv_input_nd(conv_input_descriptor, + GetCudnnDataType()); + CudnnTensorDescriptor output_nd(output_descriptor, + GetCudnnDataType()); CudnnFilterDescriptor filter(filter_descriptor, - static_cast(cudnn_data_type)); - CudnnTensorDescriptor bias_nd(bias_descriptor, CUDNN_DATA_FLOAT); - CudnnConvolutionDescriptor conv( - convolution_descriptor, static_cast(cudnn_compute_type)); + GetCudnnDataType()); + CudnnTensorDescriptor bias_nd(bias_descriptor, GetCudnnDataType()); + CudnnConvolutionDescriptor conv(convolution_descriptor, + GetCudnnDataType()); auto cudnn = cudnn_->GetHandle(parent_, stream); @@ -2933,8 +2943,7 @@ bool CudnnSupport::DoFusedConvolve( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return IsStatusOk( - DoFusedConvolveImpl( + DoFusedConvolveImpl( stream, conv_input_descriptor, conv_input_data, conv_input_scale, filter_descriptor, filter_data, convolution_descriptor, side_input_data, side_input_scale, bias_descriptor, biases, @@ -2957,8 +2966,7 @@ bool CudnnSupport::DoFusedConvolve( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return IsStatusOk( - DoFusedConvolveImpl( + DoFusedConvolveImpl( stream, conv_input_descriptor, conv_input_data, conv_input_scale, filter_descriptor, filter_data, convolution_descriptor, side_input_data, side_input_scale, bias_descriptor, biases, @@ -2982,8 +2990,7 @@ bool CudnnSupport::DoFusedConvolve( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return IsStatusOk( - DoFusedConvolveImpl( + DoFusedConvolveImpl( stream, conv_input_descriptor, conv_input_data, conv_input_scale, filter_descriptor, filter_data, convolution_descriptor, side_input_data, side_input_scale, bias_descriptor, biases, @@ -3014,8 +3021,7 @@ bool CudnnSupport::DoFusedConvolve( return false; } return IsStatusOk( - DoFusedConvolveImpl( + DoFusedConvolveImpl( stream, conv_input_descriptor, conv_input_data, conv_input_scale, filter_descriptor, filter_data, convolution_descriptor, side_input_data, side_input_scale, bias_descriptor, biases, diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index 9d88f971bb..74f6f935b8 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -674,19 +674,21 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result); - template + template port::Status DoFusedConvolveImpl( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, - const DeviceMemory& conv_input_data, ScaleType conv_input_scale, + const DeviceMemory& conv_input_data, + ScaleType conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, - const DeviceMemory& filter_data, + const DeviceMemory& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, - const DeviceMemory& side_input_data, ScaleType side_input_scale, - const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory& side_input_data, + ScaleType side_input_scale, const dnn::BatchDescriptor& bias_descriptor, const DeviceMemory& biases, dnn::ActivationMode activation_mode, const dnn::BatchDescriptor& output_descriptor, - DeviceMemory* output_data, ScratchAllocator* scratch_allocator, + DeviceMemory* output_data, + ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result); diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 19d3b2389a..69558fd14b 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -585,6 +585,44 @@ Stream &Stream::ThenConvolveWithScratch( return *this; } +Stream &Stream::ThenFusedConvolveWithAlgorithm( + const dnn::BatchDescriptor &conv_input_descriptor, + const DeviceMemory &conv_input_data, double conv_input_scale, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const DeviceMemory &side_input_data, double side_input_scale, + const dnn::BatchDescriptor &bias_descriptor, + const DeviceMemory &biases, dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory *output, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result) { + VLOG_CALL(PARAM(conv_input_descriptor), PARAM(conv_input_data), + PARAM(conv_input_scale), PARAM(filter_descriptor), + PARAM(filter_data), PARAM(convolution_descriptor), PARAM(biases), + PARAM(side_input_data), PARAM(side_input_scale), + PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), + PARAM(algorithm_config)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, + side_input_data, side_input_scale, bias_descriptor, biases, + activation_mode, output_descriptor, output, scratch_allocator, + algorithm_config, output_profile_result); + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + Stream &Stream::ThenFusedConvolveWithAlgorithm( const dnn::BatchDescriptor &conv_input_descriptor, const DeviceMemory &conv_input_data, float conv_input_scale, -- GitLab From f9cf0e5496569d4a9a1edb25fba1d6afd6dab6b7 Mon Sep 17 00:00:00 2001 From: Alan Chiao Date: Mon, 17 Sep 2018 12:07:28 -0700 Subject: [PATCH 0269/1357] Numerics tweak to symmetric quantization. PiperOrigin-RevId: 213314024 --- .../lite/kernels/internal/optimized/neon_tensor_utils.cc | 2 +- .../lite/kernels/internal/reference/portable_tensor_utils.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc index 27418178fd..36c15dbc57 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -457,7 +457,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size, return; } *scaling_factor = range / kScale; - const float scaling_factor_inv = 1.0f / *scaling_factor; + const float scaling_factor_inv = kScale / range; const int postamble_start = size - (size & (2 * kFloatWeightsPerNeonLane - 1)); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc index 77e60adc18..70d25c4bd9 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -55,7 +55,7 @@ void PortableSymmetricQuantizeFloats(const float* values, const int size, return; } *scaling_factor = range / kScale; - const float scaling_factor_inv = 1.0f / *scaling_factor; + const float scaling_factor_inv = kScale / range; for (int i = 0; i < size; ++i) { const int32_t quantized_value = static_cast(TfLiteRound(values[i] * scaling_factor_inv)); -- GitLab From 779d87cfc1421eb6be2f9cc4ae29bca77c8d2929 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Mon, 17 Sep 2018 12:18:48 -0700 Subject: [PATCH 0270/1357] Do not segfault in Conv2d/3d if cuDNN version is too low. PiperOrigin-RevId: 213315830 --- tensorflow/core/kernels/conv_ops.cc | 12 +++++++++--- tensorflow/core/kernels/conv_ops_3d.cc | 14 ++++++++++---- tensorflow/core/kernels/conv_ops_gpu.h | 6 +++++- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index 6f5c8d8461..717a9f40a9 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -731,9 +731,15 @@ void LaunchConv2DOp::operator()( if (cudnn_use_autotune && !AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config)) { std::vector algorithms; - CHECK(stream->parent()->GetConvolveAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(stream->parent()), - &algorithms)); + OP_REQUIRES( + ctx, + stream->parent()->GetConvolveAlgorithms( + conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), + &algorithms), + errors::Unknown("Failed to get convolution algorithm. This is probably " + "because cuDNN failed to initialize, so try looking to " + "see if a warning log message was printed above.")); ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 5c2b88924b..83df4dce38 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -435,10 +435,16 @@ struct LaunchConvOp { if (cudnn_use_autotune && !AutoTuneConv3d::GetInstance()->Find( conv_parameters, &algorithm_config)) { std::vector algorithms; - CHECK(stream->parent()->GetConvolveAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo( - stream->parent()), - &algorithms)); + OP_REQUIRES(ctx, + stream->parent()->GetConvolveAlgorithms( + conv_parameters.ShouldIncludeWinogradNonfusedAlgo( + stream->parent()), + &algorithms), + errors::Unknown( + "Failed to get convolution algorithm. This is probably " + "because cuDNN failed to initialize, so try looking to " + "see if a warning log message was printed above.")); + ProfileResult best_result; ProfileResult best_result_no_scratch; for (auto profile_algorithm : algorithms) { diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index afc611f277..21d135decd 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -142,8 +142,12 @@ class ConvParameters { template bool ShouldIncludeWinogradNonfusedAlgo( se::StreamExecutor* stream_exec) const { + auto* dnn_support = stream_exec->AsDnn(); + if (!dnn_support) { + return false; + } // Skip this check for cuDNN 7 and newer. - auto version = stream_exec->AsDnn()->GetVersion(); + auto version = dnn_support->GetVersion(); if (version.ok() && version.ValueOrDie().major_version() >= 7) { return true; } -- GitLab From fbd48c7a8bb088d92988fce4f757d1719e9c57a2 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 17 Sep 2018 12:24:43 -0700 Subject: [PATCH 0271/1357] fix type error within an environment variable name --- tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc | 2 +- tensorflow/core/kernels/mkl_conv_grad_input_ops.cc | 2 +- tensorflow/core/kernels/mkl_conv_ops.cc | 2 +- tensorflow/core/util/mkl_util.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 52157ed5fb..f406ad2ab5 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -853,7 +853,7 @@ class MklConvCustomBackpropFilterOp // MKL DNN allocates large buffers when a conv gradient filter primtive is // created. So we don't cache conv backward primitives when the env - // variable TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is set to true. + // variable TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is set to true. bool do_not_cache = MklPrimitiveFactory::IsPrimitiveMemOptEnabled(); conv_bwd_filter = MklConvBwdFilterPrimitiveFactory::Get( convBwdFilterDims, do_not_cache); diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index c38c9cc27c..a501ce2c93 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -713,7 +713,7 @@ class MklConvCustomBackpropInputOp : public MklConvBackpropCommonOp { TFPaddingToMklDnnPadding(this->padding_)); // We don't cache those primitves if the env variable - // TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is true and if primitve descriptor + // TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is true and if primitve descriptor // includes potentialy large buffers. MKL DNN allocates buffers // in the following cases // 1. Legacy CPU without AVX512/AVX2, or diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 184e0cb003..b332edad0a 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -901,7 +901,7 @@ class MklConvOp : public OpKernel { // In some cases, primitve descriptor includes potentialy large buffers, // we don't cache those primitves if the env variable - // TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is true. MKL DNN allocates buffers + // TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is true. MKL DNN allocates buffers // in the following cases // 1. Legacy CPU without AVX512/AVX2, or // 2. 1x1 convolution with stride != 1 diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 680211edff..5ea8f2ee47 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -2040,7 +2040,7 @@ class MklPrimitiveFactory { /// Fuction to check whether primitive memory optimization is enabled static inline bool IsPrimitiveMemOptEnabled() { bool is_primitive_mem_opt_enabled = true; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE", true, + TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", true, &is_primitive_mem_opt_enabled)); return is_primitive_mem_opt_enabled; } -- GitLab From 0d9868d8f9c01c1402ae99d672599c4bac6e787d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 12:20:03 -0700 Subject: [PATCH 0272/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213316034 --- .../internal/reference/reference_ops.h | 215 ++++++++++++++---- 1 file changed, 165 insertions(+), 50 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 77927af227..09a4ba7701 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -511,24 +511,25 @@ inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params, } } -inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, - const float* weights_data, - const Dims<4>& weights_dims, const float* bias_data, - const Dims<4>& bias_dims, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { +inline void FullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& weights_shape, + const float* weights_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data) { + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; // TODO(benoitjacob): This really should be: // const int batches = ArraySize(output_dims, 1); // but the current --variable_batch hack consists in overwriting the 3rd // dimension with the runtime batch size, as we don't keep track for each // array of which dimension is the batch dimension in it. - const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * - ArraySize(output_dims, 3); - const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); - const int accum_depth = ArraySize(weights_dims, 0); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + const int output_dims_count = output_shape.DimensionsCount(); + const int weights_dims_count = weights_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1); + const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2, + output_shape, output_dims_count - 1); + const int accum_depth = weights_shape.Dims(weights_dims_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { float total = 0.f; @@ -538,7 +539,7 @@ inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, } float bias_value = 0.0f; if (bias_data) { - bias_value = bias_data[Offset(bias_dims, out_c, 0, 0, 0)]; + bias_value = bias_data[out_c]; } output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax( total + bias_value, output_activation_min, output_activation_max); @@ -546,6 +547,26 @@ inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + tflite::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(weights_dims), weights_data, + DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void FullyConnected(const float* input_data, const Dims<4>& input_dims, @@ -559,28 +580,35 @@ void FullyConnected(const float* input_data, const Dims<4>& input_dims, output_data, output_dims); } -inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { +inline void FullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + uint8* output_data, gemmlowp::GemmContext* gemm_context) { (void)gemm_context; // only used in optimized code. + const int32 input_offset = params.input_offset; + const int32 filter_offset = params.weights_offset; + const int32 output_offset = params.output_offset; + const int32 output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); // TODO(benoitjacob): This really should be: // const int batches = ArraySize(output_dims, 1); // but the current --variable_batch hack consists in overwriting the 3rd // dimension with the runtime batch size, as we don't keep track for each // array of which dimension is the batch dimension in it. - const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * - ArraySize(output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0); - const int accum_depth = ArraySize(filter_dims, 0); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + const int output_dim_count = output_shape.DimensionsCount(); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2, + output_shape, output_dim_count - 1); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { int32 acc = 0; @@ -590,7 +618,7 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, acc += (filter_val + filter_offset) * (input_val + input_offset); } if (bias_data) { - acc += bias_data[Offset(bias_dims, out_c, 0, 0, 0)]; + acc += bias_data[out_c]; } acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, kReverseShift * output_shift); @@ -602,16 +630,47 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, const int32* bias_data, const Dims<4>& bias_dims, int32 output_offset, int32 output_multiplier, int output_shift, int32 output_activation_min, - int32 output_activation_max, int16* output_data, + int32 output_activation_max, uint8* output_data, const Dims<4>& output_dims, gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data, + gemm_context); +} + +inline void FullyConnected( + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + int16* output_data, gemmlowp::GemmContext* gemm_context) { (void)gemm_context; // only used in optimized code. + const int32 input_offset = params.input_offset; + const int32 filter_offset = params.weights_offset; + const int32 output_offset = params.output_offset; + const int32 output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); TFLITE_DCHECK_EQ(output_offset, 0); // TODO(benoitjacob): This really should be: @@ -619,12 +678,12 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, // but the current --variable_batch hack consists in overwriting the 3rd // dimension with the runtime batch size, as we don't keep track for each // array of which dimension is the batch dimension in it. - const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * - ArraySize(output_dims, 3); - const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0); - const int accum_depth = ArraySize(filter_dims, 0); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + const int output_dim_count = output_shape.DimensionsCount(); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2, + output_shape, output_dim_count - 1); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { // Internal accumulation. @@ -651,27 +710,60 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, int16* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data, + gemm_context); +} + inline void ShuffledFullyConnected( - const uint8* input_data, const Dims<4>& input_dims, - const uint8* shuffled_weights_data, const Dims<4>& weights_dims, - const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - int16* output_data, const Dims<4>& output_dims, - uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { + const FullyConnectedParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& weights_shape, + const uint8* shuffled_weights_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + int16* output_data, uint8* shuffled_input_workspace_data, + gemmlowp::GemmContext* gemm_context) { (void)gemm_context; // only used in optimized code. - + const int32 output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1); + TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); // TODO(benoitjacob): This really should be: // const int batches = ArraySize(output_dims, 1); // but the current --variable_batch hack consists in overwriting the 3rd // dimension with the runtime batch size, as we don't keep track for each // array of which dimension is the batch dimension in it. - const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * - ArraySize(output_dims, 3); - const int output_depth = MatchingArraySize(weights_dims, 1, output_dims, 0); - const int accum_depth = ArraySize(weights_dims, 0); - TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); - TFLITE_DCHECK(IsPackedWithoutStrides(weights_dims)); + const int output_dim_count = output_shape.DimensionsCount(); + const int weights_dim_count = weights_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2, + output_shape, output_dim_count - 1); + const int accum_depth = weights_shape.Dims(weights_dim_count - 1); TFLITE_DCHECK((accum_depth % 16) == 0); TFLITE_DCHECK((output_depth % 4) == 0); @@ -799,6 +891,29 @@ inline void ShuffledFullyConnected( } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void ShuffledFullyConnected( + const uint8* input_data, const Dims<4>& input_dims, + const uint8* shuffled_weights_data, const Dims<4>& weights_dims, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + int16* output_data, const Dims<4>& output_dims, + uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(weights_dims), shuffled_weights_data, + DimsToShape(bias_dims), bias_data, + DimsToShape(output_dims), output_data, + shuffled_input_workspace_data, gemm_context); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, -- GitLab From 3fe7b38347eaf7f1fb764cc2ac92de0ce7bc51e5 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 17 Sep 2018 12:23:18 -0700 Subject: [PATCH 0273/1357] [XLA] Allow adding extra instructions in HloComputation::CloneWithReplacements PiperOrigin-RevId: 213316504 --- tensorflow/compiler/xla/service/hlo_computation.cc | 8 ++++++-- tensorflow/compiler/xla/service/hlo_computation.h | 5 ++++- tensorflow/compiler/xla/service/while_loop_simplifier.cc | 5 +++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 601a008d9f..e9e70b2c57 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -916,13 +916,14 @@ std::unique_ptr HloComputation::Clone( return CloneWithReplacements( /*replacements=*/std::unordered_map>(), - context, suffix); + /*extras=*/{}, context, suffix); } std::unique_ptr HloComputation::CloneWithReplacements( std::unordered_map> replacements, - HloCloneContext* context, const string& suffix) { + absl::Span extras, HloCloneContext* context, + const string& suffix) { std::unique_ptr context_ptr; if (context == nullptr) { context_ptr = absl::make_unique(parent(), suffix); @@ -944,6 +945,9 @@ std::unique_ptr HloComputation::CloneWithReplacements( VLOG(1) << "Cloning " << name() << " --> " << suffix << "\n"; std::vector postorder; + for (HloInstruction* instr : extras) { + postorder.push_back(instr); + } for (HloInstruction* instr : MakeInstructionPostOrder()) { if (HloInstruction* replacement = replace(instr)) { postorder.push_back(replacement); diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index a880e9ab30..e7c98aae23 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -333,10 +333,13 @@ class HloComputation { // // If replacements maps a key to nullptr, we remove that instruction from the // new computation. + // If additional instructions are used by instructions in replacement map, + // they must be passed in post-order in the extras span. std::unique_ptr CloneWithReplacements( std::unordered_map> replacements, - HloCloneContext* context = nullptr, const string& suffix = "clone"); + absl::Span extras, HloCloneContext* context = nullptr, + const string& suffix = "clone"); // Returns true if the given instruction can be removed from the computation. // Parameter instructions cannot be removed without violating invariants of diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 6a7bfe3f12..9a74f22395 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -252,7 +252,7 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { // Create the new while condition, body, and init value. std::unique_ptr new_while_cond = while_cond->CloneWithReplacements( - make_while_computation_replacements(while_cond)); + make_while_computation_replacements(while_cond), /*extras=*/{}); std::unordered_map> while_body_replacements = make_while_computation_replacements(while_body); @@ -265,7 +265,8 @@ static StatusOr TryRemoveDeadWhileParams(HloInstruction* while_op) { while_body_replacements.emplace( while_body_root, HloInstruction::CreateTuple(new_while_body_root_elems)); std::unique_ptr new_while_body = - while_body->CloneWithReplacements(std::move(while_body_replacements)); + while_body->CloneWithReplacements(std::move(while_body_replacements), + /*extras=*/{}); // Add a new while_init instruction that repackages the old while_init // instruction's elements. We rely on the AlgebraicSimplifier and DCE to -- GitLab From adae337d05251963ef0905e024dfdc07b6d0aae2 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 17 Sep 2018 12:32:22 -0700 Subject: [PATCH 0274/1357] GradientTape: Documentation formatting tweak. PiperOrigin-RevId: 213318051 --- tensorflow/python/eager/backprop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index be392c7a0f..11336efebb 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -648,8 +648,8 @@ class GradientTape(object): Operations are recorded if they are executed within this context manager and at least one of their inputs is being "watched". - Trainable variables (created by `tf.Variable` or `tf.get_variable`, - trainable=True is default in both cases) are automatically watched. Tensors + Trainable variables (created by `tf.Variable` or `tf.get_variable`, where + `trainable=True` is default in both cases) are automatically watched. Tensors can be manually watched by invoking the `watch` method on this context manager. -- GitLab From de3fa499bb05c595f9e341c7d507b47b8d44ae90 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 17 Sep 2018 12:57:26 -0700 Subject: [PATCH 0275/1357] [XLA] Add ReduceWindow test. PiperOrigin-RevId: 213322116 --- tensorflow/compiler/xla/tests/reduce_window_test.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 63491a90bf..c25ccafaf8 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -1303,11 +1303,19 @@ struct R1ReduceWindowTestData { /*pad_high=*/{0}, /*reducer=*/Reducer::kAdd}, + // The pattern generated by inclusive scan (cumsum/cumprod). {/*base_bounds=*/{4096}, /*window_bounds=*/{4096}, /*strides=*/{1}, /*pad_low=*/{4095}, /*pad_high=*/{0}, /*reducer=*/Reducer::kMax}, + + // The pattern generated by exclusive scan (cumsum/cumprod). + {/*base_bounds=*/{4096}, /*window_bounds=*/{4096}, + /*strides=*/{1}, + /*pad_low=*/{4096}, + /*pad_high=*/{0}, + /*reducer=*/Reducer::kMax}, }; string R1ReduceWindowTestDataToString( -- GitLab From 5da7359a9e0b832f608dc66d7a22e647f09ec035 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 13:11:51 -0700 Subject: [PATCH 0276/1357] Raise error on encountering bad indentation during Autograph parsing. PiperOrigin-RevId: 213324570 --- tensorflow/python/autograph/pyct/parser.py | 15 ++++++++++++++- tensorflow/python/autograph/pyct/parser_test.py | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/pyct/parser.py b/tensorflow/python/autograph/pyct/parser.py index 112ed46a1e..63686350d5 100644 --- a/tensorflow/python/autograph/pyct/parser.py +++ b/tensorflow/python/autograph/pyct/parser.py @@ -31,8 +31,21 @@ from tensorflow.python.util import tf_inspect def parse_entity(entity): """Returns the AST of given entity.""" source = tf_inspect.getsource(entity) + # Comments and multiline strings can appear at arbitrary indentation levels, + # causing textwrap.dedent to not correctly dedent source code. + # TODO(b/115884650): Automatic handling of comments/multiline strings. source = textwrap.dedent(source) - return parse_str(source), source + try: + return parse_str(source), source + except IndentationError: + # Because we are parsing the source code of entities that have already + # successfully parsed once, any IndentationErrors are guaranteed to be + # caused by insufficient dedenting. + raise ValueError( + 'Failed to dedent prior to parsing source code. If you have comments ' + 'or multiline strings in your code, try indenting them. ' + 'Multiline strings can be rewritten using textwrap.dedent.\n' + 'Offending source code: \n %s' % source) def parse_str(src): diff --git a/tensorflow/python/autograph/pyct/parser_test.py b/tensorflow/python/autograph/pyct/parser_test.py index d0b465eb73..d3a7b7a014 100644 --- a/tensorflow/python/autograph/pyct/parser_test.py +++ b/tensorflow/python/autograph/pyct/parser_test.py @@ -42,6 +42,22 @@ class ParserTest(test.TestCase): """)) self.assertEqual('f', mod.body[0].name) + def test_parse_comments(self): + def f(): +# unindented comment + pass + with self.assertRaises(ValueError): + parser.parse_entity(f) + + def test_parse_multiline_strings(self): + def f(): + print(""" +some +multiline +string""") + with self.assertRaises(ValueError): + parser.parse_entity(f) + def test_parse_expression(self): node = parser.parse_expression('a.b') self.assertEqual('a', node.value.id) -- GitLab From 8ae1021b028e9e6cc1b169ffab4dd186b4d2b472 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 13:21:51 -0700 Subject: [PATCH 0277/1357] Move from deprecated self.test_session() to self.cached_session(). self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about: * the fact that the session may be reused. * the session is not closed even when doing a "with self.test_session()" statement. PiperOrigin-RevId: 213326167 --- .../kernel_tests/interleave_dataset_op_test.py | 6 +++--- .../data/kernel_tests/map_dataset_op_test.py | 2 +- .../python/kernel_tests/broadcast_to_ops_test.py | 8 ++++---- tensorflow/python/kernel_tests/check_ops_test.py | 10 +++++----- .../kernel_tests/conditional_accumulator_test.py | 4 ++-- .../kernel_tests/regex_full_match_op_test.py | 6 +++--- .../python/kernel_tests/regex_replace_op_test.py | 16 ++++++++-------- .../sparse_conditional_accumulator_test.py | 4 ++-- tensorflow/python/kernel_tests/substr_op_test.py | 14 +++++++------- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py index a35cee594a..e7e51df65e 100644 --- a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py @@ -134,7 +134,7 @@ class InterleaveDatasetTest(test.TestCase, parameterized.TestCase): result.append([value] * value) return result * count - with self.test_session() as sess: + with self.cached_session() as sess: for expected_element in self._interleave( repeat(input_values, count), cycle_length, block_length): self.assertEqual(expected_element, sess.run(get_next)) @@ -169,7 +169,7 @@ class InterleaveDatasetTest(test.TestCase, parameterized.TestCase): num_parallel_calls) get_next = dataset.make_one_shot_iterator().get_next() - with self.test_session() as sess: + with self.cached_session() as sess: for value in input_values: if np.isnan(value): with self.assertRaises(errors.InvalidArgumentError): @@ -195,7 +195,7 @@ class InterleaveDatasetTest(test.TestCase, parameterized.TestCase): init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) for i in range(10): for j in range(2): diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py index 7685d8dbdc..2ab74beb32 100644 --- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py @@ -731,7 +731,7 @@ class MapDatasetTest(test.TestCase, parameterized.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: tids = sess.run(get_next) self.assertTrue(all(tids[0] == tid for tid in tids)) # pylint: enable=g-long-lambda diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py index bd2339f31d..09c325f2bc 100644 --- a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py +++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py @@ -90,7 +90,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase): x = constant_op.constant(1, dtype=dtypes.float32) v = array_ops.broadcast_to(x, [2, 4, 3]) out = 2 * v - with self.test_session(): + with self.cached_session(): err = gradient_checker.compute_gradient_error(x, x.get_shape(), out, out.get_shape()) self.assertLess(err, 1e-4) @@ -100,7 +100,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase): dtype=dtypes.float32) v = array_ops.broadcast_to(x, [2, 5, 3]) out = 2 * v - with self.test_session(): + with self.cached_session(): err = gradient_checker.compute_gradient_error(x, x.get_shape(), out, out.get_shape()) self.assertLess(err, 1e-4) @@ -110,7 +110,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase): dtype=dtypes.float32) v = array_ops.broadcast_to(x, [5, 2, 3]) out = 2 * v - with self.test_session(): + with self.cached_session(): err = gradient_checker.compute_gradient_error(x, x.get_shape(), out, out.get_shape()) self.assertLess(err, 1e-4) @@ -119,7 +119,7 @@ class BroadcastToTest(test_util.TensorFlowTestCase): x = constant_op.constant([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32) v = array_ops.broadcast_to(x, [5, 4, 6]) out = 2 * v - with self.test_session(): + with self.cached_session(): err = gradient_checker.compute_gradient_error(x, x.get_shape(), out, out.get_shape()) self.assertLess(err, 1e-4) diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 27a674e223..bd4011d58e 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -785,7 +785,7 @@ class EnsureShapeTest(test.TestCase): derived = math_ops.divide(placeholder, 3, name="MyDivide") derived = check_ops.ensure_shape(derived, (3, 3, 3)) feed_val = [[1], [2]] - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, r"Shape of tensor MyDivide \[2,1\] is not compatible with " @@ -797,7 +797,7 @@ class EnsureShapeTest(test.TestCase): derived = placeholder / 3 derived = check_ops.ensure_shape(derived, (None, None, 3)) feed_val = [[1], [2]] - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, r"Shape of tensor [A-Za-z_]* \[2,1\] is not compatible with " @@ -809,7 +809,7 @@ class EnsureShapeTest(test.TestCase): derived = placeholder / 3 derived = check_ops.ensure_shape(derived, (2, 1)) feed_val = [[1], [2]] - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(derived, feed_dict={placeholder: feed_val}) def testEnsuresDynamicShape_WithUnknownDims(self): @@ -817,7 +817,7 @@ class EnsureShapeTest(test.TestCase): derived = placeholder / 3 derived = check_ops.ensure_shape(derived, (None, None)) feed_val = [[1], [2]] - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(derived, feed_dict={placeholder: feed_val}) def testGradient(self): @@ -826,7 +826,7 @@ class EnsureShapeTest(test.TestCase): gradient = gradients.gradients(derived, placeholder) feed_val = [[4.0], [-1.0]] - with self.test_session() as sess: + with self.cached_session() as sess: gradient_values, = sess.run(gradient, feed_dict={placeholder: feed_val}) expected = [[1.0], [1.0]] diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py index 262352a9af..97ab23fe49 100644 --- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py +++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py @@ -272,7 +272,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(15.0, val) def testAccumulatorTakeGradSum(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", @@ -349,7 +349,7 @@ class ConditionalAccumulatorTest(test.TestCase): self.assertEqual(elems_ave + 0.0, val) def testAccumulatorRepeatedTakeGradSum(self): - with self.test_session(): + with self.cached_session(): q = data_flow_ops.ConditionalAccumulator( dtypes_lib.float32, name="Q", diff --git a/tensorflow/python/kernel_tests/regex_full_match_op_test.py b/tensorflow/python/kernel_tests/regex_full_match_op_test.py index e81f562a2a..98746e7d9b 100644 --- a/tensorflow/python/kernel_tests/regex_full_match_op_test.py +++ b/tensorflow/python/kernel_tests/regex_full_match_op_test.py @@ -42,7 +42,7 @@ class RegexFullMatchOpVariantsTest(test.TestCase, parameterized.TestCase): def testRegexFullMatchTwoDims(self, op): values = [["abaaba", "abcdabcde"], ["acdcba", "ebcda"]] - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant(values, dtypes.string) matched = op(input_tensor, "a.*a").eval() self.assertAllEqual([[True, False], [True, False]], matched) @@ -68,7 +68,7 @@ class RegexFullMatchOpTest(test.TestCase): def testRegexFullMatchDelegation(self): with compat.forward_compatibility_horizon(2018, 11, 1): - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant("foo", dtypes.string) pattern = "[a-z]" op = string_ops.regex_full_match(input_tensor, pattern) @@ -80,7 +80,7 @@ class RegexFullMatchOpTest(test.TestCase): def testStaticRegexFullMatchDelegation(self): with compat.forward_compatibility_horizon(2018, 11, 20): - with self.test_session(): + with self.cached_session(): input_tensor = constant_op.constant("foo", dtypes.string) pattern = "[a-z]*" op = string_ops.regex_full_match(input_tensor, pattern) diff --git a/tensorflow/python/kernel_tests/regex_replace_op_test.py b/tensorflow/python/kernel_tests/regex_replace_op_test.py index feac3a8b08..d9b7ed28d2 100644 --- a/tensorflow/python/kernel_tests/regex_replace_op_test.py +++ b/tensorflow/python/kernel_tests/regex_replace_op_test.py @@ -33,7 +33,7 @@ from tensorflow.python.platform import test class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): def testForwarding(self, op): - with self.test_session(): + with self.cached_session(): # Generate an input that is uniquely consumed by the regex op. # This exercises code paths which are optimized for this case # (e.g., using forwarding). @@ -47,7 +47,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): def testRemovePrefix(self, op): values = ["a:foo", "a:bar", "a:foo", "b:baz", "b:qux", "ca:b"] - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) stripped = op(input_vector, "^(a:|b:)", "", replace_global=False).eval() self.assertAllEqual([b"foo", b"bar", b"foo", b"baz", b"qux", b"ca:b"], @@ -55,21 +55,21 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): def testRegexReplace(self, op): values = ["aba\naba", "abcdabcde"] - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) stripped = op(input_vector, "a.*a", "(\\0)").eval() self.assertAllEqual([b"(aba)\n(aba)", b"(abcda)bcde"], stripped) def testEmptyMatch(self, op): values = ["abc", "1"] - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) stripped = op(input_vector, "", "x").eval() self.assertAllEqual([b"xaxbxcx", b"x1x"], stripped) def testInvalidPattern(self, op): values = ["abc", "1"] - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) invalid_pattern = "A[" replace = op(input_vector, invalid_pattern, "x") @@ -78,7 +78,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): def testGlobal(self, op): values = ["ababababab", "abcabcabc", ""] - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) stripped = op(input_vector, "ab", "abc", True).eval() self.assertAllEqual([b"abcabcabcabcabc", b"abccabccabcc", b""], stripped) @@ -99,7 +99,7 @@ class RegexReplaceTest(test.TestCase, parameterized.TestCase): (as_tensor, as_string), (as_tensor, as_tensor)) def testRegexReplaceDelegation(self, pattern_fn, rewrite_fn): - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant("foo", dtypes.string) pattern = pattern_fn("[a-z]") replace = rewrite_fn(".") @@ -107,7 +107,7 @@ class RegexReplaceTest(test.TestCase, parameterized.TestCase): self.assertTrue(op.name.startswith("RegexReplace")) def testStaticRegexReplaceDelegation(self): - with self.test_session(): + with self.cached_session(): input_vector = constant_op.constant("foo", dtypes.string) pattern = "[a-z]" replace = "." diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py index 477720302d..a824d5c826 100644 --- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py +++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py @@ -195,7 +195,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): self.assertAllEqual([-1, 2], val.dense_shape) def testAccumulatorTakeGradSum(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", shape=(), reduction_type="SUM") @@ -289,7 +289,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase): val, sess) def testParallelApplyGradSum(self): - with self.test_session() as sess: + with self.cached_session() as sess: q = data_flow_ops.SparseConditionalAccumulator( dtypes_lib.float32, name="Q", diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py index 4d163a0f6f..cd3fe14883 100644 --- a/tensorflow/python/kernel_tests/substr_op_test.py +++ b/tensorflow/python/kernel_tests/substr_op_test.py @@ -46,7 +46,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = b"ell" substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -57,7 +57,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = b"" substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -79,7 +79,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): expected_value = [b"ell", b"orl"] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -104,7 +104,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): [b"ixte", b"even", b"ight"]] substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): substr = substr_op.eval() self.assertAllEqual(substr, expected_value) @@ -196,7 +196,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array(-7, dtype) length = np.array(3, dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -234,7 +234,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array([[1, 2, -3], [1, 2, -4], [1, 2, -3]], dtype) length = np.array([[3, 2, 1], [1, 2, 3], [2, 2, 2]], dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() @@ -252,7 +252,7 @@ class SubstrOpTest(test.TestCase, parameterized.TestCase): position = np.array([-1, -2, -4], dtype) length = np.array([1, 2, 3], dtype) substr_op = string_ops.substr(test_string, position, length) - with self.test_session(): + with self.cached_session(): with self.assertRaises(errors_impl.InvalidArgumentError): substr = substr_op.eval() -- GitLab From d7b4bf68dc80f1abf90bd6b857f079157028a861 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 17 Sep 2018 13:23:58 -0700 Subject: [PATCH 0278/1357] Add missing `watch` call to GradientTape documentation. PiperOrigin-RevId: 213326503 --- tensorflow/python/eager/backprop.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 11336efebb..e6cf9653a8 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -669,6 +669,7 @@ class GradientTape(object): ```python x = tf.constant(3.0) with tf.GradientTape() as g: + g.watch(x) with tf.GradientTape() as gg: gg.watch(x) y = x * x -- GitLab From a768624f1d0ae3629caf5b9784b4b6911b881c18 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 13:24:29 -0700 Subject: [PATCH 0279/1357] Move from deprecated self.test_session() to self.cached_session(). self.test_session() has been deprecated in 9962eb5e84b15e309410071b06c2ed2d6148ed44 as its name confuses readers of the test. Moving to cached_session() instead which is more explicit about: * the fact that the session may be reused. * the session is not closed even when doing a "with self.test_session()" statement. PiperOrigin-RevId: 213326581 --- .../python/kernel_tests/monte_carlo_test.py | 18 ++++---- .../training/functions/gbdt_batch_test.py | 42 +++++++++---------- .../python/kernel_tests/cudnn_rnn_test.py | 2 +- .../kernel_tests/batch_dataset_op_test.py | 8 ++-- .../python/kernel_tests/map_defun_op_test.py | 4 +- .../assert_next_dataset_op_test.py | 6 +-- .../map_and_filter_fusion_test.py | 2 +- .../optimization/model_dataset_op_test.py | 10 ++--- .../optimization/optimize_dataset_op_test.py | 12 +++--- .../kernel_tests/stats_dataset_ops_test.py | 16 +++---- .../contrib/deprecated/summaries_test.py | 10 ++--- .../python/framework/tensor_util_test.py | 2 +- .../hadoop/python/kernel_tests/hadoop_test.py | 2 +- .../kafka/python/kernel_tests/kafka_test.py | 2 +- .../sparse_feature_cross_op_test.py | 34 +++++++-------- .../learn/python/learn/graph_actions_test.py | 2 +- .../linear_operator_addition_test.py | 24 +++++------ .../metric_learning/metric_loss_ops_test.py | 16 +++---- .../python/kernel_tests/histogram_ops_test.py | 10 ++--- .../python/metrics/classification_test.py | 28 ++++++------- .../training/lazy_adam_optimizer_test.py | 6 +-- .../tensor_forest/client/eval_metrics_test.py | 8 ++-- 22 files changed, 132 insertions(+), 132 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py index 9e6a146f67..13215ffabf 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py @@ -42,7 +42,7 @@ class ExpectationImportanceSampleTest(test.TestCase): def test_normal_integral_mean_and_var_correctly_estimated(self): n = int(1e6) - with self.test_session(): + with self.cached_session(): mu_p = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64) mu_q = constant_op.constant([0.0, 0.0], dtype=dtypes.float64) sigma_p = constant_op.constant([0.5, 0.5], dtype=dtypes.float64) @@ -72,7 +72,7 @@ class ExpectationImportanceSampleTest(test.TestCase): # Test that importance sampling can correctly estimate the probability that # the product of components in a MultivariateNormal are > 0. n = 1000 - with self.test_session(): + with self.cached_session(): p = mvn_diag_lib.MultivariateNormalDiag( loc=[0.], scale_diag=[1.0, 1.0]) q = mvn_diag_lib.MultivariateNormalDiag( @@ -99,7 +99,7 @@ class ExpectationImportanceSampleLogspaceTest(test.TestCase): def test_normal_distribution_second_moment_estimated_correctly(self): # Test the importance sampled estimate against an analytical result. n = int(1e6) - with self.test_session(): + with self.cached_session(): mu_p = constant_op.constant([0.0, 0.0], dtype=dtypes.float64) mu_q = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64) sigma_p = constant_op.constant([1.0, 2 / 3.], dtype=dtypes.float64) @@ -127,7 +127,7 @@ class GetSamplesTest(test.TestCase): """Test the private method 'get_samples'.""" def test_raises_if_both_z_and_n_are_none(self): - with self.test_session(): + with self.cached_session(): dist = normal_lib.Normal(loc=0., scale=1.) z = None n = None @@ -136,7 +136,7 @@ class GetSamplesTest(test.TestCase): _get_samples(dist, z, n, seed) def test_raises_if_both_z_and_n_are_not_none(self): - with self.test_session(): + with self.cached_session(): dist = normal_lib.Normal(loc=0., scale=1.) z = dist.sample(seed=42) n = 1 @@ -145,7 +145,7 @@ class GetSamplesTest(test.TestCase): _get_samples(dist, z, n, seed) def test_returns_n_samples_if_n_provided(self): - with self.test_session(): + with self.cached_session(): dist = normal_lib.Normal(loc=0., scale=1.) z = None n = 10 @@ -154,7 +154,7 @@ class GetSamplesTest(test.TestCase): self.assertEqual((10,), z.get_shape()) def test_returns_z_if_z_provided(self): - with self.test_session(): + with self.cached_session(): dist = normal_lib.Normal(loc=0., scale=1.) z = dist.sample(10, seed=42) n = None @@ -166,7 +166,7 @@ class GetSamplesTest(test.TestCase): class ExpectationTest(test.TestCase): def test_works_correctly(self): - with self.test_session() as sess: + with self.cached_session() as sess: x = constant_op.constant([-1e6, -100, -10, -1, 1, 10, 100, 1e6]) p = normal_lib.Normal(loc=x, scale=1.) @@ -213,7 +213,7 @@ class ExpectationTest(test.TestCase): rtol=0.05, atol=0.) def test_docstring_example_normal(self): - with self.test_session() as sess: + with self.cached_session() as sess: num_draws = int(1e5) mu_p = constant_op.constant(0.) mu_q = constant_op.constant(1.) diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 73e41bc457..9d9941f696 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -86,7 +86,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testExtractFeatures(self): """Tests feature extraction.""" - with self.test_session(): + with self.cached_session(): features = {} features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) features["sparse_float"] = sparse_tensor.SparseTensor( @@ -128,7 +128,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testExtractFeaturesWithTransformation(self): """Tests feature extraction.""" - with self.test_session(): + with self.cached_session(): features = {} features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) features["sparse_float"] = sparse_tensor.SparseTensor( @@ -178,7 +178,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testExtractFeaturesFromCoreFeatureColumns(self): """Tests feature extraction when using core columns.""" - with self.test_session(): + with self.cached_session(): features = {} # Sparse float column does not exist in core, so only dense numeric and # categorical. @@ -213,7 +213,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefNoBiasCentering(self): """Tests the train function running on chief without bias centering.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -316,7 +316,7 @@ class GbdtTest(test_util.TensorFlowTestCase): self.assertProtoEquals(expected_tree, output.trees[0]) def testObliviousDecisionTreeAsWeakLearner(self): - with self.test_session(): + with self.cached_session(): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -473,7 +473,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefSparseAndDense(self): """Tests the train function with sparse and dense features.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -580,7 +580,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefScalingNumberOfExamples(self): """Tests the train function running on chief without bias centering.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -685,7 +685,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefWithBiasCentering(self): """Tests the train function running on chief with bias centering.""" - with self.test_session(): + with self.cached_session(): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -757,7 +757,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnNonChiefNoBiasCentering(self): """Tests the train function running on worker without bias centering.""" - with self.test_session(): + with self.cached_session(): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -821,7 +821,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnNonChiefWithCentering(self): """Tests the train function running on worker with bias centering.""" - with self.test_session(): + with self.cached_session(): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -885,7 +885,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testPredictFn(self): """Tests the predict function.""" - with self.test_session() as sess: + with self.cached_session() as sess: # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() text_format.Merge( @@ -939,7 +939,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testPredictFnWithLeafIndexAdvancedLeft(self): """Tests the predict function with output leaf ids.""" - with self.test_session() as sess: + with self.cached_session() as sess: # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() text_format.Merge( @@ -1051,7 +1051,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnMulticlassFullHessian(self): """Tests the GBDT train for multiclass full hessian.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") @@ -1155,7 +1155,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnMulticlassDiagonalHessian(self): """Tests the GBDT train for multiclass diagonal hessian.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") @@ -1259,7 +1259,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnMulticlassTreePerClass(self): """Tests the GBDT train for multiclass tree per class strategy.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") @@ -1374,7 +1374,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefFeatureSelectionReachedLimitNoGoodSplit(self): """Tests the train function running on chief with feature selection.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -1493,7 +1493,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefFeatureSelectionWithGoodSplits(self): """Tests the train function running on chief with feature selection.""" - with self.test_session() as sess: + with self.cached_session() as sess: ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() @@ -1610,7 +1610,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testTrainFnChiefFeatureSelectionReachedLimitIncrementAttemptedLayer(self): """Tests the train function running on chief with feature selection.""" - with self.test_session() as sess: + with self.cached_session() as sess: tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() @@ -1720,7 +1720,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testResetModelBeforeAndAfterSplit(self): """Tests whether resetting works.""" - with self.test_session(): + with self.cached_session(): # First build a small tree and train it to verify training works. ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") @@ -1854,7 +1854,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testResetModelNonChief(self): """Tests the reset function on a non-chief worker.""" - with self.test_session(): + with self.cached_session(): # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() text_format.Merge( @@ -1930,7 +1930,7 @@ class GbdtTest(test_util.TensorFlowTestCase): def testResetModelWithCenterBias(self): """Tests the reset function running on chief with bias centering.""" - with self.test_session(): + with self.cached_session(): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py index fda1b9f1b3..57793a8ff5 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py @@ -460,7 +460,7 @@ class CudnnRNNTestBasic(test_util.TensorFlowTestCase): grad, = gradients.gradients( math_ops.reduce_sum(accumulation), (original_input,)) init_op = variables.global_variables_initializer() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) accumulation_eval, grad_eval = sess.run((accumulation, grad)) self.assertAllEqual([28, 100, 100], accumulation_eval.shape) diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 8e368bf2bc..e2508de9e9 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -742,7 +742,7 @@ class RestructuredDatasetTest(test.TestCase): iterator = result.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) for _ in range(5): sess.run(get_next) @@ -813,7 +813,7 @@ class RestructuredDatasetTest(test.TestCase): .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) with self.assertRaises(errors.InvalidArgumentError): sess.run(get_next) @@ -837,7 +837,7 @@ class RestructuredDatasetTest(test.TestCase): iterator = result.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) for _ in range(5): sess.run(get_next) @@ -879,7 +879,7 @@ class RestructuredDatasetTest(test.TestCase): iterator = result.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) for _ in range(5): sess.run(get_next) diff --git a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py index 83b723710c..25aea0393f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/map_defun_op_test.py @@ -116,7 +116,7 @@ class MapDefunTest(test.TestCase): elems2 = array_ops.placeholder(dtypes.int32) result = map_defun.map_defun(fn, [elems1, elems2], [dtypes.int32, dtypes.int32], [(), ()]) - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaisesWithPredicateMatch( errors.InvalidArgumentError, "All inputs must have the same dimension 0."): @@ -225,7 +225,7 @@ class MapDefunTest(test.TestCase): c = constant_op.constant([1, 2, 3, 4, 5]) map_defun_op = map_defun.map_defun(simple_fn, [c], [dtypes.int32], [()])[0] - with self.test_session() as sess: + with self.cached_session() as sess: thread = self.checkedThread( self._assert_op_cancelled, args=(sess, map_defun_op)) thread.start() diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py index bd7b50b902..d10da80442 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/assert_next_dataset_op_test.py @@ -31,7 +31,7 @@ class AssertNextDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: self.assertEqual(0, sess.run(get_next)) def testAssertNextInvalid(self): @@ -40,7 +40,7 @@ class AssertNextDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, "Asserted Whoops transformation at offset 0 but encountered " @@ -53,7 +53,7 @@ class AssertNextDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, "Asserted next 2 transformations but encountered only 1."): diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py index dde115925e..e75edf6086 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py @@ -200,7 +200,7 @@ class MapAndFilterFusionTest(test.TestCase, parameterized.TestCase): optimization.optimize(["filter_fusion"])) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: for x in range(5): r = map_function(x) filtered = False diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py index 2b3ac85924..3b62a7e468 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/model_dataset_op_test.py @@ -40,7 +40,7 @@ class ModelDatasetTest(test.TestCase): get_next = iterator.get_next() deltas = [] - with self.test_session() as sess: + with self.cached_session() as sess: for _ in range(5): sess.run(get_next.op) for _ in range(100): @@ -64,7 +64,7 @@ class ModelDatasetTest(test.TestCase): get_next = iterator.get_next() deltas = [] - with self.test_session() as sess: + with self.cached_session() as sess: for _ in range(5): sess.run(get_next.op) for _ in range(1000): @@ -92,7 +92,7 @@ class ModelDatasetTest(test.TestCase): get_next = iterator.get_next() deltas = [] - with self.test_session() as sess: + with self.cached_session() as sess: for _ in range(5): sess.run(get_next.op) for _ in range(10): @@ -119,7 +119,7 @@ class ModelDatasetTest(test.TestCase): get_next = iterator.get_next() deltas = [] - with self.test_session() as sess: + with self.cached_session() as sess: for _ in range(5): sess.run(get_next.op) for _ in range(1000): @@ -164,7 +164,7 @@ class ModelDatasetTest(test.TestCase): get_next = iterator.get_next() deltas = [] - with self.test_session() as sess: + with self.cached_session() as sess: for _ in range(5): sess.run(get_next) for _ in range(100): diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py index 909da5aee0..a3fb824ce9 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/optimize_dataset_op_test.py @@ -38,7 +38,7 @@ class OptimizeDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -51,7 +51,7 @@ class OptimizeDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -64,7 +64,7 @@ class OptimizeDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) @@ -76,7 +76,7 @@ class OptimizeDatasetTest(test.TestCase): iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(get_next) def testOptimizationLargeInputFromTensor(self): @@ -87,7 +87,7 @@ class OptimizeDatasetTest(test.TestCase): init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op, {input_t: np.ones([512, 1024, 1025], np.int32)}) sess.run(get_next) @@ -99,7 +99,7 @@ class OptimizeDatasetTest(test.TestCase): init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op, {input_t: np.ones([1, 512, 1024, 1025], np.int32)}) sess.run(get_next) diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py index e25570c5ad..719ce2e3fe 100644 --- a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py @@ -40,7 +40,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(iterator.initializer) expected_sum = 0.0 for i in range(100): @@ -65,7 +65,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) @@ -84,7 +84,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertAllEqual( @@ -109,7 +109,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: for j in range(5): sess.run(iterator.initializer) for i in range(100): @@ -127,7 +127,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) @@ -144,7 +144,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) @@ -168,7 +168,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) @@ -188,7 +188,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): next_element = iterator_0.get_next() + iterator_1.get_next() summary_t = stats_aggregator.get_summary() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run([iterator_0.initializer, iterator_1.initializer]) for i in range(100): self.assertEqual(i * 2, sess.run(next_element)) diff --git a/tensorflow/contrib/deprecated/summaries_test.py b/tensorflow/contrib/deprecated/summaries_test.py index 6acf2a6469..4038224a1c 100644 --- a/tensorflow/contrib/deprecated/summaries_test.py +++ b/tensorflow/contrib/deprecated/summaries_test.py @@ -27,31 +27,31 @@ from tensorflow.python.platform import test class DeprecatedSummariesTest(test.TestCase): def testScalarSummary(self): - with self.test_session(): + with self.cached_session(): c = constant_op.constant(3) s = logging_ops.scalar_summary('tag', c) self.assertEqual(s.op.type, u'ScalarSummary') def testHistogramSummary(self): - with self.test_session(): + with self.cached_session(): c = constant_op.constant(3) s = logging_ops.histogram_summary('tag', c) self.assertEqual(s.op.type, u'HistogramSummary') def testImageSummary(self): - with self.test_session(): + with self.cached_session(): i = array_ops.ones((5, 4, 4, 3)) s = logging_ops.image_summary('tag', i) self.assertEqual(s.op.type, u'ImageSummary') def testAudioSummary(self): - with self.test_session(): + with self.cached_session(): c = constant_op.constant(3.0) s = logging_ops.audio_summary('tag', c, sample_rate=8000) self.assertEqual(s.op.type, u'AudioSummaryV2') def testMergeSummary(self): - with self.test_session(): + with self.cached_session(): c = constant_op.constant(3) a = logging_ops.scalar_summary('a', c) b = logging_ops.scalar_summary('b', c) diff --git a/tensorflow/contrib/framework/python/framework/tensor_util_test.py b/tensorflow/contrib/framework/python/framework/tensor_util_test.py index b1820c10c8..9b0b9b1e1b 100644 --- a/tensorflow/contrib/framework/python/framework/tensor_util_test.py +++ b/tensorflow/contrib/framework/python/framework/tensor_util_test.py @@ -186,7 +186,7 @@ class WithShapeTest(test.TestCase): unexpected_shapes) def test_with_shape_2x2_with_partial_expected_shape(self): - with self.test_session(): + with self.cached_session(): value = [[42, 43], [44, 45]] actual_shape = [2, 2] tensor = constant_op.constant(value, shape=actual_shape) diff --git a/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py b/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py index d796e43d87..f7f1189bb9 100644 --- a/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py +++ b/tensorflow/contrib/hadoop/python/kernel_tests/hadoop_test.py @@ -51,7 +51,7 @@ class SequenceFileDatasetTest(test.TestCase): init_op = iterator.initializer get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(init_op) for _ in range(num_repeats): # Dataset is repeated. for i in range(25): # 25 records. diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py index 621911876f..08ebcdb544 100644 --- a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py +++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.py @@ -54,7 +54,7 @@ class KafkaDatasetTest(test.TestCase): init_batch_op = iterator.make_initializer(batch_dataset) get_next = iterator.get_next() - with self.test_session() as sess: + with self.cached_session() as sess: # Basic test: read from topic 0. sess.run(init_op, feed_dict={topics: ["test:0:0:4"], num_epochs: 1}) for i in range(5): diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py index 28ddaa69a1..155d06a08e 100644 --- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py +++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py @@ -45,7 +45,7 @@ class SparseCrossOpTest(test.TestCase): 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_dense(self): @@ -66,7 +66,7 @@ class SparseCrossOpTest(test.TestCase): 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_integer_mixed_string_sparse(self): @@ -80,7 +80,7 @@ class SparseCrossOpTest(test.TestCase): '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_integer_mixed_string_dense(self): @@ -99,7 +99,7 @@ class SparseCrossOpTest(test.TestCase): '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2', '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_sparse_cross_dense(self): @@ -117,7 +117,7 @@ class SparseCrossOpTest(test.TestCase): 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_integer_sparse_input(self): @@ -133,7 +133,7 @@ class SparseCrossOpTest(test.TestCase): '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_permutation_3x3x3(self): @@ -176,7 +176,7 @@ class SparseCrossOpTest(test.TestCase): 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2', 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_permutation_3x1x2(self): @@ -196,7 +196,7 @@ class SparseCrossOpTest(test.TestCase): 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2' ]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_large_batch(self): @@ -229,7 +229,7 @@ class SparseCrossOpTest(test.TestCase): ]) expected_out = self._sparse_tensor(col_out) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_one_column_empty(self): @@ -242,7 +242,7 @@ class SparseCrossOpTest(test.TestCase): self._sparse_tensor([], 1), self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) ]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_empty(sess.run(op)) def test_some_columns_empty(self): @@ -261,7 +261,7 @@ class SparseCrossOpTest(test.TestCase): 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2' ]], 2) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_all_columns_empty(self): @@ -273,7 +273,7 @@ class SparseCrossOpTest(test.TestCase): self._sparse_tensor([]), self._sparse_tensor([]), self._sparse_tensor([]) ]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_empty(sess.run(op)) def test_hashed_output_zero_bucket(self): @@ -288,7 +288,7 @@ class SparseCrossOpTest(test.TestCase): hashed_output=True) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[3735511728867393167]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_hashed_output_zero_bucket_v2(self): @@ -304,7 +304,7 @@ class SparseCrossOpTest(test.TestCase): hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[1971693436396284976]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed. @@ -321,7 +321,7 @@ class SparseCrossOpTest(test.TestCase): num_buckets=100) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[74]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_hashed_output_v2(self): @@ -338,7 +338,7 @@ class SparseCrossOpTest(test.TestCase): hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) # Check actual hashed output to prevent unintentional hashing changes. expected_out = self._sparse_tensor([[83]]) - with self.test_session() as sess: + with self.cached_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) def test_hashed_output_v1_has_collision(self): @@ -384,7 +384,7 @@ class SparseCrossOpTest(test.TestCase): ], hashed_output=True, num_buckets=1000) - with self.test_session() as sess: + with self.cached_session() as sess: out = sess.run(op) self.assertEqual(6, len(out.values)) self.assertAllEqual([[0, i] for i in range(6)], out.indices) diff --git a/tensorflow/contrib/learn/python/learn/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py index d5c02124ac..33180b778a 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions_test.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions_test.py @@ -234,7 +234,7 @@ class GraphActionsTest(test.TestCase): self.assertTrue(test_ops.resource_initialized_op(handle).eval()) def test_infer_different_default_graph(self): - with self.test_session(): + with self.cached_session(): self._assert_ckpt(self._output_dir, False) with ops.Graph().as_default(): in0, in1, out = self._build_inference_graph() diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py index 6a72df6dfd..d94ac73654 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py @@ -76,7 +76,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): [1., 1.], is_positive_definite=True, name="A") op_b = linalg.LinearOperatorDiag( [2., 2.], is_positive_definite=True, name="B") - with self.test_session(): + with self.cached_session(): op_sum = add_operators([op_a, op_b]) self.assertEqual(1, len(op_sum)) op = op_sum[0] @@ -98,7 +98,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): [2., 2.], is_positive_definite=True, name="op2") op3 = linalg.LinearOperatorDiag( [3., 3.], is_positive_definite=True, name="op3") - with self.test_session(): + with self.cached_session(): op_sum = add_operators([op1, op2, op3]) self.assertEqual(1, len(op_sum)) op = op_sum[0] @@ -121,7 +121,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): name="tril") op3 = linalg.LinearOperatorDiag( [3., 3.], is_non_singular=True, name="diag_b") - with self.test_session(): + with self.cached_session(): op_sum = add_operators([op1, op2, op3]) self.assertEqual(1, len(op_sum)) op = op_sum[0] @@ -143,7 +143,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): op2 = linalg.LinearOperatorLowerTriangular( [[2., 0.], [1.5, 2.]], name="tril") op3 = linalg.LinearOperatorDiag([3., 3.], name="diag_b") - with self.test_session(): + with self.cached_session(): op_sum = add_operators([op0, op1, op2, op3], operator_name="my_operator") self.assertEqual(1, len(op_sum)) op = op_sum[0] @@ -233,7 +233,7 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase): self.assertEqual(2, len(op_sum)) found_diag = False found_tril = False - with self.test_session(): + with self.cached_session(): for op in op_sum: if isinstance(op, linalg.LinearOperatorDiag): found_diag = True @@ -273,7 +273,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase): operator = self._adder.add(id1, id2, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity)) - with self.test_session(): + with self.cached_session(): self.assertAllClose(2 * linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), operator.to_dense().eval()) @@ -291,7 +291,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase): operator = self._adder.add(id1, id2, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity)) - with self.test_session(): + with self.cached_session(): self.assertAllClose(3.2 * linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), operator.to_dense().eval()) @@ -310,7 +310,7 @@ class AddAndReturnScaledIdentityTest(test.TestCase): operator = self._adder.add(id1, id2, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity)) - with self.test_session(): + with self.cached_session(): self.assertAllClose(1.2 * linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), operator.to_dense().eval()) @@ -334,7 +334,7 @@ class AddAndReturnDiagTest(test.TestCase): operator = self._adder.add(id1, id2, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag)) - with self.test_session(): + with self.cached_session(): self.assertAllClose(2 * linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), operator.to_dense().eval()) @@ -354,7 +354,7 @@ class AddAndReturnDiagTest(test.TestCase): operator = self._adder.add(op1, op2, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag)) - with self.test_session(): + with self.cached_session(): self.assertAllClose( linalg.LinearOperatorDiag(diag1 + diag2).to_dense().eval(), operator.to_dense().eval()) @@ -379,7 +379,7 @@ class AddAndReturnTriLTest(test.TestCase): operator = self._adder.add(diag, tril, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorLowerTriangular)) - with self.test_session(): + with self.cached_session(): self.assertAllClose([[11., 0.], [30., 2.]], operator.to_dense().eval()) self.assertTrue(operator.is_positive_definite) self.assertTrue(operator.is_non_singular) @@ -401,7 +401,7 @@ class AddAndReturnMatrixTest(test.TestCase): operator = self._adder.add(diag1, diag2, "my_operator", hints) self.assertTrue(isinstance(operator, linalg.LinearOperatorFullMatrix)) - with self.test_session(): + with self.cached_session(): self.assertAllClose([[0., 0.], [0., 5.]], operator.to_dense().eval()) self.assertFalse(operator.is_positive_definite) self.assertFalse(operator.is_non_singular) diff --git a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py index 4ec539ab42..9c389144ff 100644 --- a/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py +++ b/tensorflow/contrib/losses/python/metric_learning/metric_loss_ops_test.py @@ -61,7 +61,7 @@ def pairwise_distance_np(feature, squared=False): class ContrastiveLossTest(test.TestCase): def testContrastive(self): - with self.test_session(): + with self.cached_session(): num_data = 10 feat_dim = 6 margin = 1.0 @@ -90,7 +90,7 @@ class ContrastiveLossTest(test.TestCase): class TripletSemiHardLossTest(test.TestCase): def testTripletSemiHard(self): - with self.test_session(): + with self.cached_session(): num_data = 10 feat_dim = 6 margin = 1.0 @@ -146,7 +146,7 @@ class TripletSemiHardLossTest(test.TestCase): class LiftedStructLossTest(test.TestCase): def testLiftedStruct(self): - with self.test_session(): + with self.cached_session(): num_data = 10 feat_dim = 6 margin = 1.0 @@ -217,7 +217,7 @@ def convert_to_list_of_sparse_tensor(np_matrix): class NpairsLossTest(test.TestCase): def testNpairs(self): - with self.test_session(): + with self.cached_session(): num_data = 15 feat_dim = 6 num_classes = 5 @@ -261,7 +261,7 @@ class NpairsLossTest(test.TestCase): class NpairsLossMultiLabelTest(test.TestCase): def testNpairsMultiLabelLossWithSingleLabelEqualsNpairsLoss(self): - with self.test_session(): + with self.cached_session(): num_data = 15 feat_dim = 6 reg_lambda = 0.02 @@ -290,7 +290,7 @@ class NpairsLossMultiLabelTest(test.TestCase): self.assertAllClose(loss_npairs, loss_npairs_multilabel) def testNpairsMultiLabel(self): - with self.test_session(): + with self.cached_session(): num_data = 15 feat_dim = 6 num_classes = 10 @@ -527,7 +527,7 @@ class ClusterLossTest(test.TestCase): def testClusteringLossPAMOff(self): if not HAS_SKLEARN: return - with self.test_session(): + with self.cached_session(): margin_multiplier = 10.0 embeddings, labels = self._genClusters(n_samples=128, n_clusters=64) @@ -544,7 +544,7 @@ class ClusterLossTest(test.TestCase): def testClusteringLossPAMOn(self): if not HAS_SKLEARN: return - with self.test_session(): + with self.cached_session(): margin_multiplier = 10.0 embeddings, labels = self._genClusters(n_samples=128, n_clusters=64) diff --git a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py index 1d18d6beff..bed1ecb71c 100644 --- a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py +++ b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py @@ -31,21 +31,21 @@ class Strict1dCumsumTest(test.TestCase): """Test this private function.""" def test_empty_tensor_returns_empty(self): - with self.test_session(): + with self.cached_session(): tensor = constant_op.constant([]) result = histogram_ops._strict_1d_cumsum(tensor, 0) expected = constant_op.constant([]) np.testing.assert_array_equal(expected.eval(), result.eval()) def test_length_1_tensor_works(self): - with self.test_session(): + with self.cached_session(): tensor = constant_op.constant([3], dtype=dtypes.float32) result = histogram_ops._strict_1d_cumsum(tensor, 1) expected = constant_op.constant([3], dtype=dtypes.float32) np.testing.assert_array_equal(expected.eval(), result.eval()) def test_length_3_tensor_works(self): - with self.test_session(): + with self.cached_session(): tensor = constant_op.constant([1, 2, 3], dtype=dtypes.float32) result = histogram_ops._strict_1d_cumsum(tensor, 3) expected = constant_op.constant([1, 3, 6], dtype=dtypes.float32) @@ -58,7 +58,7 @@ class AUCUsingHistogramTest(test.TestCase): self.rng = np.random.RandomState(0) def test_empty_labels_and_scores_gives_nan_auc(self): - with self.test_session(): + with self.cached_session(): labels = constant_op.constant([], shape=[0], dtype=dtypes.bool) scores = constant_op.constant([], shape=[0], dtype=dtypes.float32) score_range = [0, 1.] @@ -155,7 +155,7 @@ class AUCUsingHistogramTest(test.TestCase): from synthetic data. """ score_range = [0, 1.] or score_range - with self.test_session(): + with self.cached_session(): labels = array_ops.placeholder(dtypes.bool, shape=[num_records]) scores = array_ops.placeholder(dtypes.float32, shape=[num_records]) auc, update_op = histogram_ops.auc_using_histogram( diff --git a/tensorflow/contrib/metrics/python/metrics/classification_test.py b/tensorflow/contrib/metrics/python/metrics/classification_test.py index 3d0b81c1be..d6a670f97b 100644 --- a/tensorflow/contrib/metrics/python/metrics/classification_test.py +++ b/tensorflow/contrib/metrics/python/metrics/classification_test.py @@ -34,7 +34,7 @@ from tensorflow.python.platform import test class ClassificationTest(test.TestCase): def testAccuracy1D(self): - with self.test_session() as session: + with self.cached_session() as session: pred = array_ops.placeholder(dtypes.int32, shape=[None]) labels = array_ops.placeholder(dtypes.int32, shape=[None]) acc = classification.accuracy(pred, labels) @@ -44,7 +44,7 @@ class ClassificationTest(test.TestCase): self.assertEqual(result, 0.5) def testAccuracy1DBool(self): - with self.test_session() as session: + with self.cached_session() as session: pred = array_ops.placeholder(dtypes.bool, shape=[None]) labels = array_ops.placeholder(dtypes.bool, shape=[None]) acc = classification.accuracy(pred, labels) @@ -54,7 +54,7 @@ class ClassificationTest(test.TestCase): self.assertEqual(result, 0.5) def testAccuracy1DInt64(self): - with self.test_session() as session: + with self.cached_session() as session: pred = array_ops.placeholder(dtypes.int64, shape=[None]) labels = array_ops.placeholder(dtypes.int64, shape=[None]) acc = classification.accuracy(pred, labels) @@ -64,7 +64,7 @@ class ClassificationTest(test.TestCase): self.assertEqual(result, 0.5) def testAccuracy1DString(self): - with self.test_session() as session: + with self.cached_session() as session: pred = array_ops.placeholder(dtypes.string, shape=[None]) labels = array_ops.placeholder(dtypes.string, shape=[None]) acc = classification.accuracy(pred, labels) @@ -87,7 +87,7 @@ class ClassificationTest(test.TestCase): classification.accuracy(pred, labels) def testAccuracy1DWeighted(self): - with self.test_session() as session: + with self.cached_session() as session: pred = array_ops.placeholder(dtypes.int32, shape=[None]) labels = array_ops.placeholder(dtypes.int32, shape=[None]) weights = array_ops.placeholder(dtypes.float32, shape=[None]) @@ -101,7 +101,7 @@ class ClassificationTest(test.TestCase): self.assertEqual(result, 0.5) def testAccuracy1DWeightedBroadcast(self): - with self.test_session() as session: + with self.cached_session() as session: pred = array_ops.placeholder(dtypes.int32, shape=[None]) labels = array_ops.placeholder(dtypes.int32, shape=[None]) weights = array_ops.placeholder(dtypes.float32, shape=[]) @@ -161,7 +161,7 @@ class F1ScoreTest(test.TestCase): (10, 3), maxval=2, dtype=dtypes.int64, seed=2) f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) # Run several updates. @@ -176,7 +176,7 @@ class F1ScoreTest(test.TestCase): def testAllCorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant(inputs, dtype=dtypes.float32) labels = constant_op.constant(inputs) f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3) @@ -191,7 +191,7 @@ class F1ScoreTest(test.TestCase): [1, 0, 1, 0], shape=(1, 4), dtype=dtypes.float32) labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=1) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) sess.run([f1_op]) # Threshold 0 will have around 0.5 precision and 1 recall yielding an F1 @@ -201,7 +201,7 @@ class F1ScoreTest(test.TestCase): def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(10000, 1)) - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant(inputs, dtype=dtypes.float32) labels = constant_op.constant(1 - inputs, dtype=dtypes.float32) f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3) @@ -214,7 +214,7 @@ class F1ScoreTest(test.TestCase): self.assertAlmostEqual(2 * 0.5 * 1 / (1 + 0.5), f1.eval(), places=2) def testWeights1d(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes.float32) labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2)) @@ -228,7 +228,7 @@ class F1ScoreTest(test.TestCase): self.assertAlmostEqual(1.0, f1.eval(), places=5) def testWeights2d(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = constant_op.constant( [[1, 0], [1, 0]], shape=(2, 2), dtype=dtypes.float32) labels = constant_op.constant([[0, 1], [1, 0]], shape=(2, 2)) @@ -242,7 +242,7 @@ class F1ScoreTest(test.TestCase): self.assertAlmostEqual(1.0, f1.eval(), places=5) def testZeroLabelsPredictions(self): - with self.test_session() as sess: + with self.cached_session() as sess: predictions = array_ops.zeros([4], dtype=dtypes.float32) labels = array_ops.zeros([4]) f1, f1_op = classification.f1_score(predictions, labels, num_thresholds=3) @@ -300,7 +300,7 @@ class F1ScoreTest(test.TestCase): f1, f1_op = classification.f1_score(tf_labels, tf_predictions, num_thresholds=3) - with self.test_session() as sess: + with self.cached_session() as sess: sess.run(variables.local_variables_initializer()) for _ in range(num_batches): sess.run([f1_op]) diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py index f08ffaa36f..089ecf597d 100644 --- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py @@ -236,7 +236,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase): opt.get_slot(var=var0, name="m").name) def testBasic(self): - with self.test_session(): + with self.cached_session(): self.doTestBasic(use_resource=False) @test_util.run_in_graph_and_eager_modes(reset_test=True) @@ -249,7 +249,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase): def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): + with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -286,7 +286,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase): def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): + with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py b/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py index aa30919167..d49928e3f1 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py @@ -32,7 +32,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase): [0.9, 0.8, 0.2], [0.6, 0.4, 0.8]]) targets = constant_op.constant([[0], [2], [1], [1]]) in_top_2_op, update_op = top_2_fn(probabilities, targets) - with self.test_session(): + with self.cached_session(): # initializes internal accuracy vars variables.local_variables_initializer().run() # need to call in order to run the in_top_2_op internal operations because @@ -49,7 +49,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase): [0.3, 0.6, 0.9, 0.4, 0.8, 0.6]]) targets = constant_op.constant([3, 0, 2, 5, 1]) in_top_3_op, update_op = top_3_fn(probabilities, targets) - with self.test_session(): + with self.cached_session(): # initializes internal accuracy vars variables.local_variables_initializer().run() # need to call in order to run the in_top_3_op internal operations because @@ -61,7 +61,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase): predictions = constant_op.constant([0, 1, 3, 6, 5, 2, 7, 6, 4, 9]) targets = constant_op.constant([0, 1, 4, 6, 5, 1, 7, 5, 4, 8]) accuracy_op, update_op = eval_metrics._accuracy(predictions, targets) - with self.test_session(): + with self.cached_session(): variables.local_variables_initializer().run() # need to call in order to run the accuracy_op internal operations because # it is a streaming function @@ -74,7 +74,7 @@ class EvalMetricsTest(test_util.TensorFlowTestCase): targets = constant_op.constant( [1.0, 4.3, 2.6, 0.5, 1.1, 0.7, 5.1, 3.4, 1.8]) r2_op, update_op = eval_metrics._r2(scores, targets) - with self.test_session(): + with self.cached_session(): # initializes internal accuracy vars variables.local_variables_initializer().run() # need to call in order to run the r2_op internal operations because -- GitLab From 32ed8d488ad8088b63f046cde0c665e3b2aab8e7 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 17 Sep 2018 13:31:12 -0700 Subject: [PATCH 0280/1357] Add support for predicting models with learning_phase. PiperOrigin-RevId: 213327633 --- .../contrib/tpu/python/tpu/keras_support.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py index d8c3872363..776b9bff0f 100644 --- a/tensorflow/contrib/tpu/python/tpu/keras_support.py +++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py @@ -970,15 +970,25 @@ class TPUFunction(object): # Note: this condition is possible during the prologue or epilogue of the # pipelined loop. return None, None - # Strip sample weight from inputs + + if (self.model.uses_learning_phase and + not isinstance(K.learning_phase(), int)): + # Remove the learning_phase flag at the end. We currently hard code the + # learning_phase in TPUFunction. + assert isinstance(inputs[-1], int), ( + 'Expect the final element be learning_phase flag. Got {}'.format( + inputs[-1])) + inputs = inputs[:-1] + if (self.execution_mode == model_fn_lib.ModeKeys.TRAIN or self.execution_mode == model_fn_lib.ModeKeys.EVAL): + # Strip sample weight from inputs. input_tensors = self.model._feed_inputs + self.model._feed_targets - inputs = inputs[:len(input_tensors)] - return input_tensors, inputs else: input_tensors = self.model._feed_inputs - return input_tensors, inputs + + inputs = inputs[:len(input_tensors)] + return input_tensors, inputs def _process_outputs(self, outfeed_outputs): """Processes the outputs of a model function execution. -- GitLab From cd767b617ab00ffba993d62e4ff1f2028791fe4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 13:31:40 -0700 Subject: [PATCH 0281/1357] Compute `axes` and `free` statically during graph creation. PiperOrigin-RevId: 213327709 --- .../kernel_tests/attention_wrapper_test.py | 39 +++++++++---------- tensorflow/python/ops/math_ops.py | 20 +++++----- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index f2c43f30d4..1f3b533de9 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -919,31 +919,28 @@ class AttentionWrapperTest(test.TestCase): wrapper.BahdanauAttention, wrapper.LuongAttention) expected_final_output = BasicDecoderOutput( - rnn_output=ResultSummary(shape=(5, 3, 20), - dtype=dtype('float32'), - mean=0.11723966), - sample_id=ResultSummary(shape=(5, 3), - dtype=dtype('int32'), - mean=9.2666666666666675)) + rnn_output=ResultSummary( + shape=(5, 3, 20), dtype=dtype('float32'), mean=0.11723966), + sample_id=ResultSummary( + shape=(5, 3), dtype=dtype('int32'), mean=7.266666666666667)) expected_final_state = AttentionWrapperState( cell_state=LSTMStateTuple( - c=ResultSummary(shape=(5, 9), - dtype=dtype('float32'), - mean=-0.003545674), - h=ResultSummary(shape=(5, 9), - dtype=dtype('float32'), - mean=-0.0018327223)), - attention=ResultSummary(shape=(5, 20), - dtype=dtype('float32'), - mean=0.11728073), + c=ResultSummary( + shape=(5, 9), dtype=dtype('float32'), mean=-0.003545674), + h=ResultSummary( + shape=(5, 9), dtype=dtype('float32'), mean=-0.0018327223)), + attention=ResultSummary( + shape=(5, 20), dtype=dtype('float32'), mean=0.11601614207), time=3, - alignments=( - ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), - ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125)), + alignments=(ResultSummary( + shape=(5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary( + shape=(5, 8), dtype=dtype('float32'), mean=0.125)), alignment_history=(), - attention_state=( - ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125), - ResultSummary(shape=(5, 8), dtype=dtype('float32'), mean=0.125))) + attention_state=(ResultSummary( + shape=(5, 8), dtype=dtype('float32'), mean=0.125), + ResultSummary( + shape=(5, 8), dtype=dtype('float32'), mean=0.125))) expected_final_alignment_history = ( ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125), ResultSummary(shape=(3, 5, 8), dtype=dtype('float32'), mean=0.125)) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index acd5a32e82..f57abf6704 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2898,21 +2898,23 @@ def tensordot(a, b, axes, name=None): shape_a = a.get_shape().as_list() axes = [i if i >= 0 else i + len(shape_a) for i in axes] free = [i for i in xrange(len(shape_a)) if i not in axes] - free_dims_static = [shape_a[i] for i in free] + axes_dims = [shape_a[i] for i in axes] + free_dims = [shape_a[i] for i in free] + free_dims_static = free_dims + axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") + free = ops.convert_to_tensor(free, dtype=dtypes.int32, name="free") + shape_a = array_ops.shape(a) else: free_dims_static = None - shape_a = array_ops.shape(a) - rank_a = array_ops.rank(a) - axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") - axes = cast(axes >= 0, dtypes.int32) * axes + cast( - axes < 0, dtypes.int32) * ( - axes + rank_a) - free, _ = array_ops.setdiff1d(range(rank_a), axes) + shape_a = array_ops.shape(a) + rank_a = array_ops.rank(a) + axes = ops.convert_to_tensor(axes, dtype=dtypes.int32, name="axes") + axes = array_ops.where(axes >= 0, axes, axes + rank_a) + free, _ = array_ops.setdiff1d(range(rank_a), axes) free_dims = array_ops.gather(shape_a, free) axes_dims = array_ops.gather(shape_a, axes) prod_free_dims = reduce_prod(free_dims) prod_axes_dims = reduce_prod(axes_dims) - perm = array_ops.concat([axes_dims, free_dims], 0) if flipped: perm = array_ops.concat([axes, free], 0) new_shape = array_ops.stack([prod_axes_dims, prod_free_dims]) -- GitLab From 838d9c859583717a151395ef9c28374e124f1408 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 13:32:33 -0700 Subject: [PATCH 0282/1357] Tweak test tolerance in segment_reduction_ops_test.py, which is otherwise flaky. PiperOrigin-RevId: 213327863 --- tensorflow/python/kernel_tests/segment_reduction_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index ce507e4ad7..2931877c11 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -300,7 +300,7 @@ class UnsortedSegmentTest(SegmentReductionHelper): tf_ans = s.eval() if dtype is dtypes_lib.bfloat16: tf_ans = tf_ans.astype(np.float32) - self.assertAllClose(np_ans, tf_ans) + self.assertAllCloseAccordingToType(np_ans, tf_ans) self.assertShapeEqual(np_ans, s) def testNumSegmentsTypes(self): -- GitLab From 6add0fb2481756b276b1016033919c1c237abee1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 14:08:32 -0700 Subject: [PATCH 0283/1357] Improve the error messages in custom_export_strategy. PiperOrigin-RevId: 213334465 --- .../boosted_trees/estimator_batch/custom_export_strategy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index 78232fa0a6..48f12a64f9 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -51,6 +51,7 @@ def make_custom_export_strategy(name, feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. + use_core_columns: A boolean, whether core feature columns were used. Returns: An `ExportStrategy`. @@ -196,7 +197,7 @@ def convert_to_universal_format(dtec, sorted_feature_names, matching_id.int64_value = split.feature_id node.custom_left_child_test.Pack(categorical_test) else: - raise ValueError("Unexpected node type %s", node_type) + raise ValueError("Unexpected node type %s" % node_type) node.left_child_id.value = split.left_id node.right_child_id.value = split.right_id return model_and_features @@ -236,7 +237,7 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats, assert tree_node.node_metadata.gain == 0 continue else: - raise ValueError("Unexpected split type %s", node_type) + raise ValueError("Unexpected split type %s" % node_type) # Apply shrinkage factor. It is important since it is not always uniform # across different trees. sums[split_column] += ( -- GitLab From 4516558acc9763999b19d1af75ab1fcd6562e4f0 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 17 Sep 2018 14:20:13 -0700 Subject: [PATCH 0284/1357] Use a single thread in eager if inter_op_parallelism_threads isn't specified. PiperOrigin-RevId: 213336463 --- tensorflow/core/common_runtime/eager/context.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index 263467a5b6..18420b60fd 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -32,6 +32,18 @@ bool ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val) { return default_val; } +std::unique_ptr EagerThreadPool( + const SessionOptions& opts) { + SessionOptions opts_copy(opts); + if (opts_copy.config.inter_op_parallelism_threads() == 0) { + // Eager defaults to a single thread when no threads are specified. + opts_copy.config.set_inter_op_parallelism_threads(1); + } + + return std::unique_ptr( + NewThreadPoolFromSessionOptions(opts_copy)); +} + } // namespace EagerContext::EagerContext(const SessionOptions& opts, @@ -49,7 +61,7 @@ EagerContext::EagerContext(const SessionOptions& opts, : policy_(default_policy), devices_(device_mgr->ListDevices()), rendezvous_(rendezvous), - thread_pool_(NewThreadPoolFromSessionOptions(opts)), + thread_pool_(EagerThreadPool(opts)), pflr_(new ProcessFunctionLibraryRuntime( device_mgr, opts.env, TF_GRAPH_DEF_VERSION, &func_lib_def_, {}, thread_pool_.get())), @@ -67,7 +79,7 @@ EagerContext::EagerContext(const SessionOptions& opts, } InitDeviceMapAndAsync(); runner_ = [this](std::function closure) { - this->thread_pool_->Schedule(closure); + this->thread_pool_->Schedule(std::move(closure)); }; } -- GitLab From 28dd4d9fcbf8cac1008b2ccd2b4be3fa3c25afd1 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 17 Sep 2018 14:24:17 -0700 Subject: [PATCH 0285/1357] Keep only weak references to variables in graph functions This enables cleanup of the variables referenced in defunned methods of objects when the object is garbage collected. Since one PolymorphicFunction is created per @defun, decorated methods before this change held on to all of the variables referenced in that method for any instance of the class (i.e. variables which should have been object-scoped were scoped to the lifetime of the class definition). Raises an exception if variables used in the function have been deleted when it is called, which means no local variables. PiperOrigin-RevId: 213337256 --- .../python/mirrored_strategy_multigpu_test.py | 12 ++++- tensorflow/python/eager/function.py | 53 ++++++++++++------ tensorflow/python/eager/function_test.py | 54 ++++++++++++------- tensorflow/python/framework/ops_test.py | 12 ++--- tensorflow/python/keras/backend.py | 4 +- .../python/training/gradient_descent_test.py | 10 ++-- 6 files changed, 95 insertions(+), 50 deletions(-) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index c6894e9013..f51e543624 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -1271,7 +1271,17 @@ class MirroredStrategyDefunTest(test.TestCase): self.evaluate(device_result)) for defun in defuns: - self.assertEqual(set(mock_model.variables), set(defun.variables)) + # PolymorphicFunctions are specialized to the current device stack, so + # call_for_each has one trace per device. To check that the expected set + # of variables was accessed on each trace, we first retrieve each + # device-specific graph function. + per_device_graph_functions = dist.call_for_each_tower( + defun.get_concrete_function, + mock_model, *inputs, run_concurrently=False) + for device in devices: + graph_function = per_device_graph_functions.get(device=device) + self.assertEqual(set(mock_model.variables), + set(graph_function.graph.variables)) @test_util.run_in_graph_and_eager_modes() def testVariableInDefun(self): diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index e2874e25b6..4f1a85a274 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -23,6 +23,7 @@ import collections import functools import sys import threading +import weakref import numpy as np import six @@ -180,7 +181,7 @@ class FuncGraph(ops.Graph): self.inputs = [] self.outputs = [] self.structured_outputs = None - self.variables = [] + self._weak_variables = [] self.outer_graph = ops.get_default_graph() self.captures = collections.OrderedDict() @@ -217,6 +218,31 @@ class FuncGraph(ops.Graph): self._graph_key = graph._graph_key # pylint: enable=protected-access + @property + def variables(self): + """A list of variables accessed by this FuncGraph. + + Note that functions keep only weak references to variables. Calling the + function after a variable it accesses has been deleted is an error. + + Yields: + Strong references to variables accessed by this FuncGraph. + """ + for weak_v in self._weak_variables: + v = weak_v() + if v is None: + raise AssertionError( + "Called a function referencing variables which have been deleted. " + "This likely means that function-local variables were created and " + "not referenced elsewhere in the program. This is generally a " + "mistake; consider storing variables in an object attribute on " + "first call.") + yield v + + @variables.setter + def variables(self, var_list): + self._weak_variables = [weakref.ref(v) for v in var_list] + def create_op( self, op_type, @@ -603,11 +629,6 @@ class Function(object): """Returns the graph from which this function was constructed.""" return self._func_graph - @property - def variables(self): - """Returns all variables touched by this function.""" - return self._func_graph.variables - @property def inputs(self): """Returns tensors in `self.graph` corresponding to arguments.""" @@ -970,7 +991,16 @@ def _encode_arg(arg): return tuple( (_encode_arg(key), _encode_arg(arg[key])) for key in sorted(arg)) else: - return arg + try: + # If possible, keep only a weak reference to Python objects. Weak + # references hash to the same value as the original object. + # TODO(allenl): Clean up dead functions and their cache keys if the cache + # gets large. Right now creating objects with a defunned method, calling + # the method, and losing a reference to the object in a loop will leak + # memory here. + return weakref.ref(arg) + except TypeError: + return arg def _deterministic_dict_values(dictionary): @@ -1020,7 +1050,6 @@ class PolymorphicFunction(object): self._kwds_to_include = {} self._name = name self._function_cache = collections.OrderedDict() - self._variables = [] self._function_attributes = attributes or {} self._lock = threading.Lock() @@ -1066,12 +1095,6 @@ class PolymorphicFunction(object): """Returns the wrapped Python function.""" return self._python_function - # TODO(akshayka): Remove this property. - @property - def variables(self): - """Returns the union of all variables referenced by cached `Function`s`.""" - return self._variables - def get_concrete_function(self, *args, **kwargs): """Returns a `Function` object specialized to inputs and execution context. @@ -1238,8 +1261,6 @@ class PolymorphicFunction(object): func_graph_from_py_func(self._name, self._python_function, args, kwds, self._input_signature), self._function_attributes) - self._variables.extend( - [v for v in graph_function.variables if v not in self._variables]) self._function_cache[cache_key] = graph_function return graph_function, [ t for t in nest.flatten((args, kwds)) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index c168b6060c..6326a5b45f 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -21,6 +21,7 @@ import collections import functools from multiprocessing.pool import ThreadPool import sys +import weakref import numpy @@ -74,6 +75,13 @@ class MiniModel(keras_training.Model): return self.fc(inputs) +class DefunnedMiniModel(MiniModel): + + @function.defun + def call(self, inputs, training=True): + return super(DefunnedMiniModel, self).call(inputs, training=training) + + @test_util.with_c_shapes class FunctionTest(test.TestCase): @@ -140,8 +148,8 @@ class FunctionTest(test.TestCase): @function.defun def f(): - v = resource_variable_ops.ResourceVariable(1.0) - return v.read_value() + self.v = resource_variable_ops.ResourceVariable(1.0) + return self.v.read_value() self.assertAllEqual(f(), 1.0) @@ -399,9 +407,9 @@ class FunctionTest(test.TestCase): @function.defun def tensor_init(): - v = resource_variable_ops.ResourceVariable( + self.v = resource_variable_ops.ResourceVariable( lambda: constant_op.constant(2.0)) - return v.read_value() + return self.v.read_value() value = tensor_init() if not context.executing_eagerly(): @@ -415,8 +423,8 @@ class FunctionTest(test.TestCase): def tensor_init(): with ops.init_scope(): const = constant_op.constant(2.0) - v = resource_variable_ops.ResourceVariable(const) - return v.read_value() + self.v = resource_variable_ops.ResourceVariable(const) + return self.v.read_value() value = tensor_init() if not context.executing_eagerly(): @@ -478,13 +486,14 @@ class FunctionTest(test.TestCase): def testDefunForcesResourceVariables(self): def variable_creator(): - return variables.Variable(0.0).read_value() + self.v = variables.Variable(0.0) + return self.v.read_value() + self.v = None defined = function.defun(variable_creator) defined() # Create the variable. - self.assertEqual(len(defined.variables), 1) self.assertIsInstance( - defined.variables[0], resource_variable_ops.ResourceVariable) + self.v, resource_variable_ops.ResourceVariable) def testDefunDifferentiable(self): v = resource_variable_ops.ResourceVariable(1.0) @@ -1184,13 +1193,11 @@ class FunctionTest(test.TestCase): defined = function.defun(foo) x = constant_op.constant([1.0]) - self.assertAllEqual(defined.variables, []) - _ = defined(x) - self.assertAllEqual(defined.variables, [v]) + self.assertEqual(1., self.evaluate(defined(x))) + v.assign(2.) x = constant_op.constant([1.0, 2.0]) - _ = defined(x) # ensure the variables list remains the same - self.assertAllEqual(defined.variables, [v]) + self.assertAllEqual([2., 4.], self.evaluate(defined(x))) def testPythonFunctionWithDefaultArgs(self): @@ -1913,10 +1920,10 @@ class AutomaticControlDependenciesTest(test.TestCase): @function.defun def train(): - v = resource_variable_ops.ResourceVariable(1.0) - grad = backprop.implicit_grad(loss)(v) + self.v = resource_variable_ops.ResourceVariable(1.0) + grad = backprop.implicit_grad(loss)(self.v) optimizer.apply_gradients(grad) - return v.read_value() + return self.v.read_value() value = train() self.assertEqual(value.numpy(), -1.0) @@ -1943,10 +1950,10 @@ class AutomaticControlDependenciesTest(test.TestCase): @function.defun def train(): - v = resource_variable_ops.ResourceVariable(1.0) - grad = backprop.implicit_grad(loss)(v) + self.v = resource_variable_ops.ResourceVariable(1.0) + grad = backprop.implicit_grad(loss)(self.v) optimizer.apply_gradients(grad) - return v.read_value() + return self.v.read_value() train() @@ -2133,6 +2140,13 @@ class AutomaticControlDependenciesTest(test.TestCase): modify_same_flat(nested_input) + def testDecoratedMethodVariableCleanup(self): + m = DefunnedMiniModel() + m(array_ops.ones([1, 2])) + weak_variables = weakref.WeakSet(m.variables) + self.assertEqual(2, len(weak_variables)) + del m + self.assertEqual([], list(weak_variables)) if __name__ == '__main__': ops.enable_eager_execution( diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index d59adf3d48..c3a3437743 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2142,8 +2142,8 @@ class InitScopeTest(test_util.TensorFlowTestCase): def function_with_variables(): with ops.init_scope(): - v = resource_variable_ops.ResourceVariable(3) - return v.assign_add(1) + self.v = resource_variable_ops.ResourceVariable(3) + return self.v.assign_add(1) with context.eager_mode(): # Each invocation of function_with_variables recreates a variable. @@ -2188,13 +2188,13 @@ class InitScopeTest(test_util.TensorFlowTestCase): def inner_function(): with ops.init_scope(): - v = resource_variable_ops.ResourceVariable(1) - return v.assign_add(2) + self.v = resource_variable_ops.ResourceVariable(1) + return self.v.assign_add(2) def outer_function(inner=None): with ops.init_scope(): - v0 = resource_variable_ops.ResourceVariable(0) - return v0.assign_add(1) + inner() + self.v0 = resource_variable_ops.ResourceVariable(0) + return self.v0.assign_add(1) + inner() with context.eager_mode(): # Each invocation of outer_function recreates variables. diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 5e1722ba20..60ed8e8c8a 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -696,14 +696,14 @@ def track_variable(v): return graph = v.graph if hasattr(v, 'graph') else ops.get_default_graph() if graph not in _GRAPH_VARIABLES: - _GRAPH_VARIABLES[graph] = set() + _GRAPH_VARIABLES[graph] = weakref.WeakSet() _GRAPH_VARIABLES[graph].add(v) def _get_variables(graph=None): """Returns variables corresponding to the given graph for initialization.""" assert not context.executing_eagerly() - variables = _GRAPH_VARIABLES.get(graph, set()) + variables = _GRAPH_VARIABLES.setdefault(graph, weakref.WeakSet()) for opt in _GRAPH_TF_OPTIMIZERS.get(graph, set()): variables.update(opt.optimizer.variables()) return variables diff --git a/tensorflow/python/training/gradient_descent_test.py b/tensorflow/python/training/gradient_descent_test.py index 56d82a5b88..1ddea598e5 100644 --- a/tensorflow/python/training/gradient_descent_test.py +++ b/tensorflow/python/training/gradient_descent_test.py @@ -252,12 +252,12 @@ class GradientDescentOptimizerTest(test.TestCase): optimizer = gradient_descent.GradientDescentOptimizer(1.0) def step(): - v = resource_variable_ops.ResourceVariable(1.0) + self.v = resource_variable_ops.ResourceVariable(1.0) with backprop.GradientTape() as tape: - loss = v ** 2 - grad = tape.gradient(loss, v) - optimizer.apply_gradients([(grad, v)]) - return v.read_value() + loss = self.v ** 2 + grad = tape.gradient(loss, self.v) + optimizer.apply_gradients([(grad, self.v)]) + return self.v.read_value() compiled_step = function.defun(step) -- GitLab From 55581a5bed7108c2d39ab603db8c916b6d624648 Mon Sep 17 00:00:00 2001 From: Eddie Zhou Date: Mon, 17 Sep 2018 15:06:34 -0700 Subject: [PATCH 0286/1357] Fix testing bug where partitioned primals wasn't actually being tested (constructing Variable directly instead of get_variable under scope with partitioner). PiperOrigin-RevId: 213345447 --- .../python/kernel_tests/sdca_ops_test.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 1d2db1cec8..7a1914d41f 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -134,7 +134,7 @@ def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero): return examples_dict, variables_dict -def make_variable_dict(max_age, max_gender, partitioned=False): +def make_variable_dict(max_age, max_gender, num_shards=None, partitioned=False): # TODO(sibyl-toe9oF2e): Figure out how to derive max_age & max_gender from # examples_dict. partitioner = None @@ -142,14 +142,15 @@ def make_variable_dict(max_age, max_gender, partitioned=False): partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2, axis=0) with variable_scope.variable_scope( - name_or_scope='variables', + name_or_scope=('variables/shard_{}'.format(num_shards) + if num_shards else 'variables'), partitioner=partitioner): - age_weights = variables_lib.Variable( - array_ops.zeros( - [max_age + 1], dtype=dtypes.float32)) - gender_weights = variables_lib.Variable( - array_ops.zeros( - [max_gender + 1], dtype=dtypes.float32)) + age_weights = variable_scope.get_variable( + name='age', + initializer=array_ops.zeros([max_age + 1], dtype=dtypes.float32)) + gender_weights = variable_scope.get_variable( + name='gender', + initializer=array_ops.zeros([max_gender + 1], dtype=dtypes.float32)) return dict( sparse_features_weights=[age_weights, gender_weights], dense_features_weights=[]) @@ -242,7 +243,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) + variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, @@ -290,7 +291,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1, partitioned=True) + variables = make_variable_dict(1, 1, num_shards, partitioned=True) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, @@ -463,7 +464,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) + variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=0, symmetric_l1_regularization=0, @@ -521,7 +522,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): with self._single_threaded_test_session(): # Only use examples 0 and 2 examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) + variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, @@ -561,7 +562,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) + variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, @@ -598,7 +599,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(3, 1) + variables = make_variable_dict(3, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, @@ -639,7 +640,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) + variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, @@ -679,7 +680,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for num_shards in _SHARD_NUMBERS: with self._single_threaded_test_session(): examples = make_example_dict(example_protos, example_weights) - variables = make_variable_dict(1, 1) + variables = make_variable_dict(1, 1, num_shards) options = dict( symmetric_l2_regularization=1, symmetric_l1_regularization=0, -- GitLab From bb30dfce198341b2ec80d0aa22b49eaa5eac533b Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 17 Sep 2018 15:11:22 -0700 Subject: [PATCH 0287/1357] Add benchmarks comparing Mkl vs Default Conv2D ops. PiperOrigin-RevId: 213346439 --- tensorflow/core/kernels/BUILD | 21 + tensorflow/core/kernels/mkl_conv_ops_test.cc | 407 +++++++++++++++++++ tensorflow/tensorflow.bzl | 2 + 3 files changed, 430 insertions(+) create mode 100644 tensorflow/core/kernels/mkl_conv_ops_test.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 94d3ab4467..ef176a7de6 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -30,6 +30,7 @@ load( "//tensorflow:tensorflow.bzl", "if_android", "tf_cc_test", + "tf_cc_test_mkl", "tf_cc_tests", "tf_cc_binary", "tf_copts", @@ -6228,6 +6229,26 @@ tf_mkl_kernel_library( ] + mkl_deps(), ) +tf_cc_test_mkl( + name = "mkl_conv_ops_test", + size = "small", + srcs = ["mkl_conv_ops_test.cc"], + deps = [ + ":ops_testutil", + ":ops_util", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_mkl_kernel_library( name = "mkl_tfconv_op", prefix = "mkl_tfconv", diff --git a/tensorflow/core/kernels/mkl_conv_ops_test.cc b/tensorflow/core/kernels/mkl_conv_ops_test.cc new file mode 100644 index 0000000000..a055351337 --- /dev/null +++ b/tensorflow/core/kernels/mkl_conv_ops_test.cc @@ -0,0 +1,407 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/public/session.h" + +#if defined(INTEL_MKL_DNN_ONLY) +#include "third_party/intel_mkl_dnn/include/mkldnn.h" +#include "tensorflow/core/util/mkl_util.h" +#endif + +// TODO(ezhulenev): Add numerical tests that will compare results of default +// (aka Eigen) convolutions with MKL convolutions. + +// -------------------------------------------------------------------------- // +// Performance Benchmarks. // +// -------------------------------------------------------------------------- // + +// Compare performance of default Tensorflow convolution kernels (Eigen) with +// MKL kernels on CPU. + +// Before running these benchmarks configure OpenMP environment variables: +// export KMP_BLOCKTIME=0 +// export OMP_NUM_THREADS=${num_threads} + +namespace tensorflow { + +struct Conv2DDimensions { + Conv2DDimensions(int n, int h, int w, int c, int fc, int fh, int fw) + : input_batches(n), + input_height(h), + input_width(w), + input_depth(c), + filter_count(fc), + filter_height(fh), + filter_width(fw) {} + + int input_batches; + int input_height; + int input_width; + int input_depth; + int filter_count; + int filter_height; + int filter_width; +}; + +static Tensor GetRandomTensor(const TensorShape& shape) { + Tensor tensor(DT_FLOAT, TensorShape(shape)); + tensor.flat() = tensor.flat().setRandom(); + return tensor; +} + +// Get a random Tensor for the Conv2D input. +static Tensor GetRandomInputTensor(const Conv2DDimensions& dims) { + return GetRandomTensor({dims.input_batches, dims.input_height, + dims.input_width, dims.input_depth}); +} + +// Get a random Tensor for the Conv2D filter. +static Tensor GetRandomFilterTensor(const Conv2DDimensions& dims) { + return GetRandomTensor({dims.filter_height, dims.filter_width, + dims.input_depth, dims.filter_count}); +} + +// Get a random Tensor for the Conv2D output (assuming SAME padding). +static Tensor GetRandomOutputTensor(const Conv2DDimensions& dims) { + return GetRandomTensor({dims.input_batches, dims.input_height, + dims.input_width, dims.filter_count}); +} + +// Get a Tensor encoding Conv2D input shape. +static Tensor GetInputSizesTensor(const Conv2DDimensions& dims) { + return test::AsTensor({dims.input_batches, dims.input_height, + dims.input_width, dims.input_depth}); +} + +// Get a Tensor encoding Conv2D filter shape. +static Tensor GetFilterSizesTensor(const Conv2DDimensions& dims) { + return test::AsTensor({dims.filter_height, dims.filter_width, + dims.input_depth, dims.filter_count}); +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Tensor NonMklTensor() { + MklDnnShape non_mkl_shape; + non_mkl_shape.SetMklTensor(false); + + auto size = static_cast(non_mkl_shape.GetSerializeBufferSize()); + Tensor tensor(DT_UINT8, {size}); + + non_mkl_shape.SerializeMklDnnShape(tensor.flat().data(), + size * sizeof(uint8)); + return tensor; +} +#endif + +static Graph* DefaultConv2D(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + + Node* conv2d; + TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d"), "Conv2D") + .Input(input) + .Input(filter) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Finalize(graph, &conv2d)); + + return graph; +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Graph* MklConv2D(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + + Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl"); + + Node* conv2d; + TF_CHECK_OK(NodeBuilder(graph->NewName("mkl_conv_2d"), "_MklConv2D") + .Input(input) + .Input(filter) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Attr("_kernel", "MklOp") + .Finalize(graph, &conv2d)); + + return graph; +} +#endif + +static Graph* DefaultConv2DBwdInput(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_sizes_t = GetInputSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input_sizes = + test::graph::Constant(graph, input_sizes_t, "input_sizes"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* conv2d_bwd_input; + TF_CHECK_OK( + NodeBuilder(graph->NewName("conv_2d_bwd_input"), "Conv2DBackpropInput") + .Input(input_sizes) + .Input(filter) + .Input(out_backprop) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Finalize(graph, &conv2d_bwd_input)); + + return graph; +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Graph* MklConv2DBwdInput(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_sizes_t = GetInputSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input_sizes = + test::graph::Constant(graph, input_sizes_t, "input_sizes"); + Node* filter = test::graph::Constant(graph, filter_t, "filter"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl"); + + Node* conv2d_bwd_input; + TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d_bwd_input"), + "_MklConv2DBackpropInput") + .Input(input_sizes) + .Input(filter) + .Input(out_backprop) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Attr("_kernel", "MklOp") + .Finalize(graph, &conv2d_bwd_input)); + + return graph; +} +#endif + +static Graph* DefaultConv2DBwdFilter(const Conv2DDimensions& dims) { + auto* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_sizes_t = GetFilterSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter_sizes = + test::graph::Constant(graph, filter_sizes_t, "filter_sizes"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* conv2d_bwd_filter; + TF_CHECK_OK( + NodeBuilder(graph->NewName("conv_2d_bwd_filter"), "Conv2DBackpropFilter") + .Input(input) + .Input(filter_sizes) + .Input(out_backprop) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Finalize(graph, &conv2d_bwd_filter)); + + return graph; +} + +#if defined(INTEL_MKL_DNN_ONLY) +static Graph* MklConv2DBwdFilter(const Conv2DDimensions& dims) { + Graph* graph = new Graph(OpRegistry::Global()); + + Tensor input_t = GetRandomInputTensor(dims); + Tensor filter_sizes_t = GetFilterSizesTensor(dims); + Tensor filter_t = GetRandomFilterTensor(dims); + Tensor out_backprop_t = GetRandomOutputTensor(dims); // assuming SAME padding + + Node* input = test::graph::Constant(graph, input_t, "input"); + Node* filter_sizes = + test::graph::Constant(graph, filter_sizes_t, "filter_sizes"); + Node* out_backprop = + test::graph::Constant(graph, out_backprop_t, "out_backprop"); + + Node* not_mkl_shape = test::graph::Constant(graph, NonMklTensor(), "not_mkl"); + + Node* conv2d_bwd_filter; + TF_CHECK_OK(NodeBuilder(graph->NewName("conv_2d_bwd_filter"), + "_MklConv2DBackpropFilter") + .Input(input) + .Input(filter_sizes) + .Input(out_backprop) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Input(not_mkl_shape) + .Attr("T", DT_FLOAT) + .Attr("strides", {1, 1, 1, 1}) + .Attr("padding", "SAME") + .Attr("_kernel", "MklOp") + .Finalize(graph, &conv2d_bwd_filter)); + + return graph; +} +#endif + +// Macro arguments names: --------------------------------------------------- // +// N: batch size +// H: height +// W: width +// C: channels +// FC: filter count +// FH: filter height +// FW: filter width + +#define BM_CONCAT(a, b) a##b + +#define BM_NAME(p, type, N, H, W, C, FC, FH, FW) \ + BM_CONCAT(BM_##p##_##type##_in_##N##_##H##_##W##_##C, _f_##FC##_##FH##_##FW) + +// Flops computation in these benchmarks are the same as in +// eigen_benchmark_cpu_test.cc. + +#define BM_Conv2DT(kind, N, H, W, C, FC, FH, FW, type, LABEL) \ + static void BM_NAME(Conv2D_##kind, type, N, H, W, C, FC, FH, \ + FW)(int iters) { \ + testing::SetLabel(LABEL); \ + \ + int64 num_computed_elements = (N) * (H) * (W) * (FC); \ + int64 flops_per_iter = num_computed_elements * ((C) * (FH) * (FW)); \ + testing::ItemsProcessed(static_cast(iters) * flops_per_iter); \ + \ + Conv2DDimensions dims(N, H, W, C, FC, FW, FH); \ + test::Benchmark(#type, BM_CONCAT(kind, Conv2D)(dims)).Run(iters); \ + } \ + BENCHMARK(BM_NAME(Conv2D_##kind, type, N, H, W, C, FC, FH, FW)) + +#if defined(INTEL_MKL_DNN_ONLY) +#define BM_Conv2D(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \ + BM_Conv2DT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL); +#else +#define BM_Conv2D(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DT(Default, N, H, W, C, FC, FH, FW, type, LABEL); +#endif + +#define BM_Conv2DBwdInputT(kind, N, H, W, C, FC, FH, FW, type, LABEL) \ + static void BM_NAME(Conv2DBwdInput_##kind, type, N, H, W, C, FC, FH, \ + FW)(int iters) { \ + testing::SetLabel(LABEL); \ + \ + int64 num_computed_elements = (N) * (H) * (W) * (C); \ + int64 flops_per_iter = num_computed_elements * ((C) * (FH) * (FW)); \ + testing::ItemsProcessed(static_cast(iters) * flops_per_iter); \ + \ + Conv2DDimensions dims(N, H, W, C, FC, FW, FH); \ + test::Benchmark(#type, BM_CONCAT(kind, Conv2DBwdInput)(dims)).Run(iters); \ + } \ + BENCHMARK(BM_NAME(Conv2DBwdInput_##kind, type, N, H, W, C, FC, FH, FW)) + +#if defined(INTEL_MKL_DNN_ONLY) +#define BM_Conv2DBwdInput(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdInputT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \ + BM_Conv2DBwdInputT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL); +#else +#define BM_Conv2DBwdInput(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdInputT(Default, N, H, W, C, FC, FH, FW, type, LABEL); +#endif + +#define BM_Conv2DBwdFilterT(kind, N, H, W, C, FC, FH, FW, type, LABEL) \ + static void BM_NAME(Conv2DBwdFilter_##kind, type, N, H, W, C, FC, FH, \ + FW)(int iters) { \ + testing::SetLabel(LABEL); \ + \ + int64 num_computed_elements = (FH) * (FW) * (C) * (FC); \ + int64 flops_per_iter = num_computed_elements * ((N) * (H) * (W)); \ + testing::ItemsProcessed(static_cast(iters) * flops_per_iter); \ + \ + Conv2DDimensions dims(N, H, W, C, FC, FW, FH); \ + test::Benchmark(#type, BM_CONCAT(kind, Conv2DBwdFilter)(dims)).Run(iters); \ + } \ + BENCHMARK(BM_NAME(Conv2DBwdFilter_##kind, type, N, H, W, C, FC, FH, FW)) + +#if defined(INTEL_MKL_DNN_ONLY) +#define BM_Conv2DBwdFilter(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdFilterT(Default, N, H, W, C, FC, FH, FW, type, LABEL); \ + BM_Conv2DBwdFilterT(Mkl, N, H, W, C, FC, FH, FW, type, LABEL); +#else +#define BM_Conv2DBwdFilter(N, H, W, C, FC, FH, FW, type, LABEL) \ + BM_Conv2DBwdFilterT(Default, N, H, W, C, FC, FH, FW, type, LABEL); +#endif + +// ImageNet Convolutions ---------------------------------------------------- // + +BM_Conv2D(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3"); +BM_Conv2D(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5"); +BM_Conv2D(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3"); +BM_Conv2D(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5"); +BM_Conv2D(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3"); +BM_Conv2D(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5"); +BM_Conv2D(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3"); + +BM_Conv2DBwdInput(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3"); +BM_Conv2DBwdInput(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5"); +BM_Conv2DBwdInput(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3"); +BM_Conv2DBwdInput(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5"); +BM_Conv2DBwdInput(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3"); +BM_Conv2DBwdInput(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5"); +BM_Conv2DBwdInput(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3"); + +BM_Conv2DBwdFilter(32, 28, 28, 96, 128, 3, 3, cpu, "conv3a_00_3x3"); +BM_Conv2DBwdFilter(32, 28, 28, 16, 32, 5, 5, cpu, "conv3a_00_5x5"); +BM_Conv2DBwdFilter(32, 28, 28, 128, 192, 3, 3, cpu, "conv3_00_3x3"); +BM_Conv2DBwdFilter(32, 28, 28, 32, 96, 5, 5, cpu, "conv3_00_5x5"); +BM_Conv2DBwdFilter(32, 14, 14, 96, 204, 3, 3, cpu, "conv4a_00_3x3"); +BM_Conv2DBwdFilter(32, 14, 14, 16, 48, 5, 5, cpu, "conv4a_00_5x5"); +BM_Conv2DBwdFilter(32, 14, 14, 112, 224, 3, 3, cpu, "conv4b_00_3x3"); + +} // namespace tensorflow diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 16f7b217b4..689679c838 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1216,9 +1216,11 @@ def tf_mkl_kernel_library( if prefix: srcs = srcs + native.glob( [prefix + "*.cc"], + exclude = [prefix + "*test*"], ) hdrs = hdrs + native.glob( [prefix + "*.h"], + exclude = [prefix + "*test*"], ) # -fno-exceptions in nocopts breaks compilation if header modules are enabled. -- GitLab From 77a1883c9dde50efdf9505528adf636ed991e431 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 15:11:35 -0700 Subject: [PATCH 0288/1357] Fix _check_is_tensor like _check_is_tensor_or_operation was fixed in #22264. PiperOrigin-RevId: 213346485 --- tensorflow/python/estimator/model_fn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py index 331a9d1a05..0f26a5bba4 100644 --- a/tensorflow/python/estimator/model_fn.py +++ b/tensorflow/python/estimator/model_fn.py @@ -26,7 +26,6 @@ import six from tensorflow.python.estimator.export import export_output as export_output_lib from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework import tensor_util from tensorflow.python.keras.metrics import Metric from tensorflow.python.ops import array_ops from tensorflow.python.saved_model import signature_constants @@ -467,13 +466,13 @@ class _TPUEstimatorSpec( def _check_is_tensor_or_operation(x, name): - if not (isinstance(x, ops.Operation) or tensor_util.is_tensor(x)): + if not (isinstance(x, ops.Operation) or ops.is_dense_tensor_like(x)): raise TypeError('{} must be Operation or Tensor, given: {}'.format(name, x)) def _check_is_tensor(x, tensor_name): """Returns `x` if it is a `Tensor`, raises TypeError otherwise.""" - if not isinstance(x, ops.Tensor): + if not ops.is_dense_tensor_like(x): raise TypeError('{} must be Tensor, given: {}'.format(tensor_name, x)) return x -- GitLab From 3ec29c57b728f5f3b8f80e84f3189f70f86536ea Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 17 Sep 2018 15:27:59 -0700 Subject: [PATCH 0289/1357] Add api_docs_relpath option. Eliminate error when copying a file to itself. PiperOrigin-RevId: 213349424 --- tensorflow/tools/docs/BUILD | 3 ++- tensorflow/tools/docs/generate_lib.py | 14 ++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index 4f7efe193f..b218e900bf 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -91,9 +91,10 @@ py_binary( ":parser", ":pretty_docs", ":py_guide_parser", - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", + "//tensorflow/python:util", "//tensorflow/tools/common:public_api", "//tensorflow/tools/common:traverse", + "@six_archive//:six", ], ) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 1cd9cb7ca9..77a3ca2052 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -453,7 +453,11 @@ def update_id_tags_inplace(src_dir): EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt']) -def replace_refs(src_dir, output_dir, reference_resolver, file_pattern='*.md'): +def replace_refs(src_dir, + output_dir, + reference_resolver, + file_pattern='*.md', + api_docs_relpath='api_docs'): """Fix @{} references in all files under `src_dir` matching `file_pattern`. A matching directory structure, with the modified files is @@ -472,12 +476,13 @@ def replace_refs(src_dir, output_dir, reference_resolver, file_pattern='*.md'): reference_resolver: A `parser.ReferenceResolver` to make the replacements. file_pattern: Only replace references in files matching file_patters, using fnmatch. Non-matching files are copied unchanged. + api_docs_relpath: Relative-path string to the api_docs, from the src_dir. """ # Iterate through all the source files and process them. for dirpath, _, filenames in os.walk(src_dir): + depth = os.path.relpath(src_dir, start=dirpath) # How to get from `dirpath` to api_docs/python/ - relative_path_to_root = os.path.relpath( - path=os.path.join(src_dir, 'api_docs/python'), start=dirpath) + relative_path_to_root = os.path.join(depth, api_docs_relpath, 'python') # Make the directory under output_dir. new_dir = os.path.join(output_dir, @@ -497,7 +502,8 @@ def replace_refs(src_dir, output_dir, reference_resolver, file_pattern='*.md'): full_out_path = os.path.join(output_dir, suffix) # Copy files that do not match the file_pattern, unmodified. if not fnmatch.fnmatch(base_name, file_pattern): - shutil.copyfile(full_in_path, full_out_path) + if full_in_path != full_out_path: + shutil.copyfile(full_in_path, full_out_path) continue with open(full_in_path, 'rb') as f: -- GitLab From aec9a7077001e8eacb278839f2e56c228afdc4a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 15:30:35 -0700 Subject: [PATCH 0290/1357] Move OvicBenchmarker class from app folder to source folder. PiperOrigin-RevId: 213349833 --- tensorflow/contrib/lite/java/ovic/BUILD | 1 + tensorflow/contrib/lite/java/ovic/demo/app/BUILD | 1 - .../lite/java/ovic/demo/app/OvicBenchmarkerActivity.java | 2 ++ .../main/java/org/tensorflow/ovic}/OvicBenchmarker.java | 4 +--- 4 files changed, 4 insertions(+), 4 deletions(-) rename tensorflow/contrib/lite/java/ovic/{demo/app => src/main/java/org/tensorflow/ovic}/OvicBenchmarker.java (98%) diff --git a/tensorflow/contrib/lite/java/ovic/BUILD b/tensorflow/contrib/lite/java/ovic/BUILD index 781289ceb2..bb0be04ca2 100644 --- a/tensorflow/contrib/lite/java/ovic/BUILD +++ b/tensorflow/contrib/lite/java/ovic/BUILD @@ -44,6 +44,7 @@ java_binary( android_library( name = "ovicbenchmarkerlib", srcs = [ + "src/main/java/org/tensorflow/ovic/OvicBenchmarker.java", "src/main/java/org/tensorflow/ovic/OvicClassifier.java", "src/main/java/org/tensorflow/ovic/OvicSingleImageResult.java", ], diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD index a8d751ade2..b2e3a9bd7d 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/BUILD +++ b/tensorflow/contrib/lite/java/ovic/demo/app/BUILD @@ -6,7 +6,6 @@ licenses(["notice"]) # Apache 2.0 android_binary( name = "ovic_benchmarker_binary", srcs = [ - "OvicBenchmarker.java", "OvicBenchmarkerActivity.java", ], assets = [ diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java index 59457c308a..4adf94aeb6 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java +++ b/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarkerActivity.java @@ -34,8 +34,10 @@ import java.io.InputStream; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.text.DecimalFormat; +import org.tensorflow.ovic.OvicBenchmarker; import org.tensorflow.ovic.OvicSingleImageResult; + /** Class that benchmark image classifier models. */ public class OvicBenchmarkerActivity extends Activity { /** Tag for the {@link Log}. */ diff --git a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java similarity index 98% rename from tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java rename to tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java index 113ab74a20..4cda258bee 100644 --- a/tensorflow/contrib/lite/java/ovic/demo/app/OvicBenchmarker.java +++ b/tensorflow/contrib/lite/java/ovic/src/main/java/org/tensorflow/ovic/OvicBenchmarker.java @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -package ovic.demo.app; +package org.tensorflow.ovic; import android.graphics.Bitmap; import android.os.SystemClock; @@ -22,8 +22,6 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.MappedByteBuffer; -import org.tensorflow.ovic.OvicClassifier; -import org.tensorflow.ovic.OvicSingleImageResult; /** * Class that benchmarks image classifier models. -- GitLab From 3365cd1cc7bf3dcb781c76652132119bf82133e6 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 17 Sep 2018 15:32:12 -0700 Subject: [PATCH 0291/1357] Add generic fallback optimized implementations for dilated DepthwiseConv. PiperOrigin-RevId: 213350122 --- tensorflow/contrib/lite/kernels/BUILD | 1 + .../contrib/lite/kernels/depthwise_conv.cc | 24 +-- .../lite/kernels/depthwise_conv_test.cc | 162 ++++++++++++++++-- .../internal/depthwiseconv_float_test.cc | 75 ++++---- .../internal/depthwiseconv_quantized_test.cc | 15 +- .../internal/optimized/depthwiseconv_float.h | 52 +++--- .../internal/optimized/depthwiseconv_uint8.h | 68 +++++--- .../depthwiseconv_uint8_3x3_filter.h | 6 +- .../lite/kernels/internal/test_util.cc | 20 ++- .../contrib/lite/kernels/internal/test_util.h | 3 +- 10 files changed, 281 insertions(+), 145 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index f52d29ea76..daaf6714cc 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -509,6 +509,7 @@ tf_cc_test( ":builtin_ops", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_absl//absl/memory", "@com_google_googletest//:gtest", ], ) diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index 3e1ce60113..798ee849ec 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -184,17 +184,7 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, const Dims<4>&, const float*, const Dims<4>&, int, int, int, int, int, int, int, float, float, float*, const Dims<4>&); - KernelType effective_kernel_type; - // TODO(suharshs): Currently only the reference implementation supports - // dilations. - if ((params->dilation_width_factor != 1) || - (params->dilation_height_factor != 1)) { - effective_kernel_type = kReference; - } else { - effective_kernel_type = kernel_type; - } - - if (effective_kernel_type == kReference) { + if (kernel_type == kReference) { depthwise_conv = &reference_ops::DepthwiseConv; } else { depthwise_conv = &optimized_ops::DepthwiseConv; @@ -224,17 +214,7 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, int, int, int, int, int, int, int, int32, int32, int, int32, int32, uint8*, const Dims<4>&); - KernelType effective_kernel_type; - // TODO(suharshs): Currently only the reference implementation supports - // dilations. - if ((params->dilation_width_factor != 1) || - (params->dilation_height_factor != 1)) { - effective_kernel_type = kReference; - } else { - effective_kernel_type = kernel_type; - } - - if (effective_kernel_type == kReference) { + if (kernel_type == kReference) { depthwise_conv = &reference_ops::DepthwiseConv; } else { depthwise_conv = &optimized_ops::DepthwiseConv; diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc index 2af26ab80a..4a33a0319d 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv_test.cc @@ -14,12 +14,24 @@ limitations under the License. ==============================================================================*/ #include #include +#include "absl/memory/memory.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" #include "tensorflow/contrib/lite/model.h" namespace tflite { + +namespace ops { +namespace builtin { + +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF(); +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT(); +TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT(); + +} // namespace builtin +} // namespace ops + namespace { using ::testing::ElementsAreArray; @@ -28,9 +40,11 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel { public: // TODO(ahentz): Also test different activation types, bias, padding types, // stride values. - BaseDepthwiseConvolutionOpModel(const TensorData& input, + BaseDepthwiseConvolutionOpModel(TfLiteRegistration* registration, + const TensorData& input, const TensorData& filter, const TensorData& output, + Padding padding_type, int dilation_factor = 1) { input_ = AddInput(input); filter_ = AddInput(filter); @@ -56,11 +70,14 @@ class BaseDepthwiseConvolutionOpModel : public SingleOpModel { SetBuiltinOp( BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions, - CreateDepthwiseConv2DOptions(builder_, Padding_VALID, 1, 1, depth_mul, + CreateDepthwiseConv2DOptions(builder_, padding_type, 1, 1, depth_mul, ActivationFunctionType_NONE, dilation_factor, dilation_factor) .Union()); + resolver_ = absl::make_unique( + BuiltinOperator_DEPTHWISE_CONV_2D, registration); + BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)}); } @@ -86,10 +103,25 @@ class DepthwiseConvolutionOpModel : public BaseDepthwiseConvolutionOpModel { std::vector GetOutput() { return ExtractVector(output_); } }; -TEST(DepthwiseConvolutionOpTest, SimpleTest) { - DepthwiseConvolutionOpModel m({TensorType_FLOAT32, {1, 3, 2, 2}}, +const auto kKernelMap = new std::map({ + {"Reference", ops::builtin::Register_DEPTHWISE_CONVOLUTION_REF()}, + {"GenericOptimized", + ops::builtin::Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()}, + {"NeonOptimized", ops::builtin::Register_DEPTHWISE_CONVOLUTION_NEON_OPT()}, +}); + +class DepthwiseConvolutionOpTest : public SingleOpTest { + protected: + const std::map& GetKernelMap() override { + return *kKernelMap; + } +}; + +TEST_P(DepthwiseConvolutionOpTest, SimpleTest) { + DepthwiseConvolutionOpModel m(GetRegistration(), + {TensorType_FLOAT32, {1, 3, 2, 2}}, {TensorType_FLOAT32, {1, 2, 2, 4}}, - {TensorType_FLOAT32, {}}); + {TensorType_FLOAT32, {}}, Padding_VALID); m.SetInput({ 1, 2, 7, 8, // column 1 @@ -112,7 +144,7 @@ TEST(DepthwiseConvolutionOpTest, SimpleTest) { })); } -TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) { +TEST_P(DepthwiseConvolutionOpTest, SimpleDilatedTestPaddingValid) { const int depth = 1; const int image_width = 9; const int image_height = 9; @@ -121,10 +153,11 @@ TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) { const int filter_count = 1; const int dilation_factor = 3; DepthwiseConvolutionOpModel m( + GetRegistration(), {TensorType_FLOAT32, {image_batch_count, image_height, image_width, depth}}, {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}}, - {TensorType_FLOAT32, {}}, dilation_factor); + {TensorType_FLOAT32, {}}, Padding_VALID, dilation_factor); // The image matrix is: // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -164,6 +197,41 @@ TEST(DepthwiseConvolutionOpTest, SimpleDilatedTest) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5})); } +TEST_P(DepthwiseConvolutionOpTest, SimpleDilatedTestPaddingSame) { + const int depth = 1; + const int image_width = 3; + const int image_height = 3; + const int image_batch_count = 1; + const int filter_size = 2; + const int filter_count = 1; + const int dilation_factor = 2; + DepthwiseConvolutionOpModel m( + GetRegistration(), + {TensorType_FLOAT32, + {image_batch_count, image_height, image_width, depth}}, + {TensorType_FLOAT32, {depth, filter_size, filter_size, filter_count}}, + {TensorType_FLOAT32, {}}, Padding_SAME, dilation_factor); + + // The image matrix is: + // | 1 | 1 | 1 | + // | 1 | 1 | 1 | + // | 1 | 1 | 1 | + m.SetInput({1, 1, 1, 1, 1, 1, 1, 1, 1}); + // The filter matrix is: + // | 1 | 2 | + // | 3 | 4 | + m.SetFilter({1, 2, 3, 4}); + // No bias for this test. + m.SetBias({0}); + m.Invoke(); + + // Output: + // | 4 | 7 | 3 | + // | 6 |10 | 4 | + // | 2 | 3 | 1 | + EXPECT_THAT(m.GetOutput(), ElementsAreArray({4, 7, 3, 6, 10, 4, 2, 3, 1})); +} + class QuantizedDepthwiseConvolutionOpModel : public BaseDepthwiseConvolutionOpModel { public: @@ -188,13 +256,20 @@ class QuantizedDepthwiseConvolutionOpModel } }; +class QuantizedDepthwiseConvolutionOpTest : public SingleOpTest { + protected: + const std::map& GetKernelMap() override { + return *kKernelMap; + } +}; + // In this test we set the input and output scales so that the results match // exactly the 'non-quantized' version. -TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) { +TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) { QuantizedDepthwiseConvolutionOpModel m( - {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64}, + GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64}, {TensorType_UINT8, {1, 2, 2, 4}, -63.5, 64}, - {TensorType_UINT8, {}, -127, 128}); + {TensorType_UINT8, {}, -127, 128}, Padding_VALID); m.SetInput({ 1, 2, 7, 8, // column 1 @@ -224,15 +299,16 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleTestQuantized) { })); } -TEST(QuantizedDepthwiseConvolutionOpTest, - SimpleTestQuantizedFilterMultiplierGreaterThan1) { +TEST_P(QuantizedDepthwiseConvolutionOpTest, + SimpleTestQuantizedFilterMultiplierGreaterThan1) { QuantizedDepthwiseConvolutionOpModel quant_op( - {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64}, + GetRegistration(), {TensorType_UINT8, {1, 3, 2, 2}, -63.5, 64}, {TensorType_UINT8, {1, 2, 2, 4}, -128.5, 128}, - {TensorType_UINT8, {}, -127, 128}); - DepthwiseConvolutionOpModel float_op({TensorType_FLOAT32, {1, 3, 2, 2}}, + {TensorType_UINT8, {}, -127, 128}, Padding_VALID); + DepthwiseConvolutionOpModel float_op(GetRegistration(), + {TensorType_FLOAT32, {1, 3, 2, 2}}, {TensorType_FLOAT32, {1, 2, 2, 4}}, - {TensorType_FLOAT32, {}}); + {TensorType_FLOAT32, {}}, Padding_VALID); std::initializer_list input = { 1, 2, 7, 8, // column 1 @@ -261,7 +337,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, ElementsAreArray(ArrayFloatNear(float_op.GetOutput(), 1))); } -TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) { +TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTestPaddingValid) { const int depth = 1; const int image_width = 9; const int image_height = 9; @@ -270,6 +346,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) { const int filter_count = 1; const int dilation_factor = 3; QuantizedDepthwiseConvolutionOpModel m( + GetRegistration(), {TensorType_UINT8, {image_batch_count, image_height, image_width, depth}, 0, @@ -278,7 +355,7 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) { {depth, filter_size, filter_size, filter_count}, 0, 255}, - {TensorType_UINT8, {}, 0, 255}, dilation_factor); + {TensorType_UINT8, {}, 0, 255}, Padding_VALID, dilation_factor); // The image matrix is: // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | @@ -319,6 +396,55 @@ TEST(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTest) { ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5})); } +TEST_P(QuantizedDepthwiseConvolutionOpTest, SimpleDilatedTestPaddingSame) { + const int depth = 1; + const int image_width = 3; + const int image_height = 3; + const int image_batch_count = 1; + const int filter_size = 2; + const int filter_count = 1; + const int dilation_factor = 2; + QuantizedDepthwiseConvolutionOpModel m( + GetRegistration(), + {TensorType_UINT8, + {image_batch_count, image_height, image_width, depth}, + 0, + 255}, + {TensorType_UINT8, + {depth, filter_size, filter_size, filter_count}, + 0, + 255}, + {TensorType_UINT8, {}, 0, 255}, Padding_SAME, dilation_factor); + + // The image matrix is: + // | 1 | 1 | 1 | + // | 1 | 1 | 1 | + // | 1 | 1 | 1 | + m.SetInput({1, 1, 1, 1, 1, 1, 1, 1, 1}); + // The filter matrix is: + // | 1 | 2 | + // | 3 | 4 | + m.SetFilter({1, 2, 3, 4}); + // No bias for this test. + m.SetBias({0}); + m.Invoke(); + + // Output: + // | 4 | 7 | 3 | + // | 6 |10 | 4 | + // | 2 | 3 | 1 | + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray({4, 7, 3, 6, 10, 4, 2, 3, 1})); +} + +INSTANTIATE_TEST_CASE_P( + DepthwiseConvolutionOpTest, DepthwiseConvolutionOpTest, + ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap))); + +INSTANTIATE_TEST_CASE_P( + QuantizedDepthwiseConvolutionOpTest, QuantizedDepthwiseConvolutionOpTest, + ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap))); + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc index 844ee6a53d..7600b26f5c 100644 --- a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include "tensorflow/contrib/lite/kernels/internal/common.h" #include "tensorflow/contrib/lite/kernels/internal/test_util.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" @@ -28,23 +29,29 @@ namespace tflite { namespace { // Runs the DepthwiseConv and compares against the reference implementation. -template void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, const float* bias_data, const Dims<4>& bias_dims, - int stride, int pad_width, int pad_height, - int depth_multiplier, const Dims<4>& output_dims) { + int stride, int dilation_width_factor, + int dilation_height_factor, int pad_width, + int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, + const Dims<4>& output_dims) { const int output_buffer_size = RequiredBufferSizeForDims(output_dims); std::vector output_data(output_buffer_size); std::vector reference_output_data(output_buffer_size); - reference_ops::DepthwiseConv(input_data, input_dims, filter_data, - filter_dims, bias_data, bias_dims, stride, - pad_width, pad_height, depth_multiplier, - reference_output_data.data(), output_dims); - optimized_ops::DepthwiseConv(input_data, input_dims, filter_data, - filter_dims, bias_data, bias_dims, stride, - pad_width, pad_height, depth_multiplier, - output_data.data(), output_dims); + reference_ops::DepthwiseConv( + input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride, stride, dilation_width_factor, dilation_height_factor, pad_width, + pad_height, depth_multiplier, output_activation_min, + output_activation_max, reference_output_data.data(), output_dims); + optimized_ops::DepthwiseConv( + input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride, stride, dilation_width_factor, dilation_height_factor, pad_width, + pad_height, depth_multiplier, output_activation_min, + output_activation_max, output_data.data(), output_dims); + double sum_abs_diff = 0; float max_abs_val = 0; for (int i = 0; i < output_buffer_size; i++) { @@ -59,27 +66,6 @@ void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims, } } -void TestOneDepthwiseConv(FusedActivationFunctionType Ac, - const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride, int pad_width, int pad_height, - int depth_multiplier, const Dims<4>& output_dims) { -#define TOCO_HANDLE_CASE(AC_TYPE) \ - if (AC_TYPE == Ac) { \ - TestOneDepthwiseConv(input_data, input_dims, filter_data, \ - filter_dims, bias_data, bias_dims, stride, \ - pad_width, pad_height, depth_multiplier, \ - output_dims); \ - return; \ - } - TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone) - TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu) - TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1) - TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6) -#undef TOCO_HANDLE_CASE -} - // This function picks some random DepthwiseConv params, which may or may not // be legal. If they're not legal, it returns false. If they're legal, // it runs the DepthwiseConv test and returns true. This allows the caller @@ -99,6 +85,16 @@ bool TryTestOneDepthwiseConv() { const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50); const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); const int output_depth = input_depth * depth_multiplier; + const int dilation_width_factor = RandomElement(std::vector({1, 2, 4})); + const int dilation_height_factor = RandomElement(std::vector({1, 2, 4})); + float output_activation_min, output_activation_max; + FusedActivationFunctionType ac = + RandomElement(std::vector( + {FusedActivationFunctionType::kNone, + FusedActivationFunctionType::kRelu, + FusedActivationFunctionType::kRelu1, + FusedActivationFunctionType::kRelu6})); + GetActivationMinMax(ac, &output_activation_min, &output_activation_max); // The optimized DepthwiseConv implementation currently uses a fixed-size // accumulator buffer on the stack, with that size. This currently means // that it does not support larger output depths. It CHECK's for it, @@ -109,10 +105,6 @@ bool TryTestOneDepthwiseConv() { if (output_depth > kMaxSupportedOutputDepth) { return false; } - const auto ac = RandomElement(std::vector( - {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu, - FusedActivationFunctionType::kRelu6, - FusedActivationFunctionType::kRelu1})); Dims<4> input_dims_inference = MakeDimsForInference(input_depth, input_width, input_height, batch); Dims<4> output_dims_inference; @@ -120,7 +112,8 @@ bool TryTestOneDepthwiseConv() { const auto padding_type = UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width, - filter_height, stride, padding_type, + filter_height, stride, dilation_width_factor, + dilation_height_factor, padding_type, &output_dims_inference, &pad_width, &pad_height)) { return false; } @@ -140,10 +133,12 @@ bool TryTestOneDepthwiseConv() { FillRandom(&input_data, -input_amplitude, input_amplitude); FillRandom(&filter_data, -filter_amplitude, filter_amplitude); FillRandom(&bias_data, -bias_amplitude, bias_amplitude); - TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference, + TestOneDepthwiseConv(input_data.data(), input_dims_inference, filter_data.data(), filter_dims_inference, - bias_data.data(), bias_dims_inference, stride, pad_width, - pad_height, depth_multiplier, output_dims_inference); + bias_data.data(), bias_dims_inference, stride, + dilation_width_factor, dilation_height_factor, pad_width, + pad_height, depth_multiplier, output_activation_min, + output_activation_max, output_dims_inference); return true; } diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc index 2c0fc8433e..312d048b2d 100644 --- a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc @@ -199,6 +199,7 @@ void TestOneDepthwiseConv( bool TryTestDepthwiseConv(int batch, int input_depth, int input_width, int input_height, int filter_width, int filter_height, int depth_multiplier, int stride, + int dilation_width_factor, int dilation_height_factor, PaddingType padding_type) { const int output_depth = input_depth * depth_multiplier; // The optimized DepthwiseConv implementation currently uses a fixed-size @@ -231,7 +232,8 @@ bool TryTestDepthwiseConv(int batch, int input_depth, int input_width, Dims<4> output_dims_inference; int pad_width, pad_height; if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width, - filter_height, stride, padding_type, + filter_height, stride, dilation_width_factor, + dilation_height_factor, padding_type, &output_dims_inference, &pad_width, &pad_height)) { return false; } @@ -274,12 +276,15 @@ bool TryTestOneDepthwiseConv() { const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10); const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50); const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const int dilation_width_factor = RandomElement(std::vector({1, 2, 4})); + const int dilation_height_factor = RandomElement(std::vector({1, 2, 4})); const auto padding_type = UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; return TryTestDepthwiseConv(batch, input_depth, input_width, input_height, filter_width, filter_height, depth_multiplier, - stride, padding_type); + stride, dilation_width_factor, + dilation_height_factor, padding_type); } // Tests parameters for the 3x3 filter kernel. @@ -292,6 +297,9 @@ bool TryTestOneDepthwiseConv3x3Filter() { const int filter_height = 3; const int depth_multiplier = 1; const int stride = UniformRandomInt(1, 2); + // We don't support dilations in the 3x3 filter. + const int dilation_width_factor = 1; + const int dilation_height_factor = 1; // Although the kernel supports only kValid padding, we test that kSame // is using the correct code path. const auto padding_type = @@ -299,7 +307,8 @@ bool TryTestOneDepthwiseConv3x3Filter() { return TryTestDepthwiseConv(batch, input_depth, input_width, input_height, filter_width, filter_height, depth_multiplier, - stride, padding_type); + stride, dilation_width_factor, + dilation_height_factor, padding_type); } void TestOneDepthwiseConv() { diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index f2d1319801..f0bea7fa1d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -761,7 +761,8 @@ struct FloatDepthwiseConvKernel { // Accumulates the effect of one row of the filter, on a segment of one row // of the output, accessing the corresponding one row of the input. template -void FloatDepthwiseConvAccumRow(int stride, int input_depth, int input_width, +void FloatDepthwiseConvAccumRow(int stride, int dilation_factor, + int input_depth, int input_width, const float* input_data, int pad_width, int depth_multiplier, int filter_width, const float* filter_data, @@ -835,10 +836,10 @@ void FloatDepthwiseConvAccumRow(int stride, int input_depth, int input_width, // generic fallback of FloatDepthwiseConvAccumRow, portable, non-templatized. inline void FloatDepthwiseConvAccumRowGeneric( - int stride, int input_depth, int input_width, const float* input_data, - int pad_width, int depth_multiplier, int filter_width, - const float* filter_data, int out_x_buffer_start, int out_x_buffer_end, - int output_depth, float* acc_buffer) { + int stride, int dilation_factor, int input_depth, int input_width, + const float* input_data, int pad_width, int depth_multiplier, + int filter_width, const float* filter_data, int out_x_buffer_start, + int out_x_buffer_end, int output_depth, float* acc_buffer) { gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); #ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK #ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK @@ -860,6 +861,7 @@ inline void FloatDepthwiseConvAccumRowGeneric( << "* stride = " << stride << "\n" << "* input_depth = " << input_depth << "\n" << "* depth_multiplier = " << depth_multiplier << "\n" + << "* dilation_factor = " << dilation_factor << "\n" << "*\n" << "* Please do not hesitate to contact benoitjacob@ with this\n" << "* information.\n" @@ -869,14 +871,17 @@ inline void FloatDepthwiseConvAccumRowGeneric( const float* filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int out_x_loop_start = std::max( - out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride); - const int out_x_loop_end = - std::min(out_x_buffer_end, - (pad_width + input_width - filter_x + stride - 1) / stride); + out_x_buffer_start, + (pad_width - dilation_factor * filter_x + stride - 1) / stride); + const int out_x_loop_end = std::min( + out_x_buffer_end, + (pad_width + input_width - dilation_factor * filter_x + stride - 1) / + stride); float* acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; - const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x; + const int in_x_origin = + (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x; const float* input_ptr = input_data + in_x_origin * input_depth; const int input_ptr_increment = (stride - 1) * input_depth; for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) { @@ -921,14 +926,14 @@ inline void DepthwiseConv( const int depth_multiplier = params.depth_multiplier; const float output_activation_min = params.float_activation_min; const float output_activation_max = params.float_activation_max; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - // TODO(suharshs): Optimized implementation of dilation depthwise conv need to - // be implemented. - TFLITE_DCHECK_EQ(params.dilation_width_factor, 1); - TFLITE_DCHECK_EQ(params.dilation_height_factor, 1); + const bool has_dilation = (params.dilation_width_factor != 1) || + (params.dilation_height_factor != 1); const int batches = MatchingDim(input_shape, 0, output_shape, 0); const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); @@ -961,7 +966,7 @@ inline void DepthwiseConv( FIXED_DEPTH_MULTIPLIER) \ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ - depth_multiplier == FIXED_DEPTH_MULTIPLIER) { \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) { \ row_accum_func = \ FloatDepthwiseConvAccumRow; \ @@ -1014,9 +1019,13 @@ inline void DepthwiseConv( for (int b = 0; b < batches; ++b) { for (int out_y = 0; out_y < output_height; ++out_y) { const int in_y_origin = (out_y * stride_height) - pad_height; - const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_start = + std::max(0, (-in_y_origin + dilation_height_factor - 1) / + dilation_height_factor); const int filter_y_end = - std::min(filter_height, input_height - in_y_origin); + std::min(filter_height, + (input_height - in_y_origin + dilation_height_factor - 1) / + dilation_height_factor); for (int out_x_buffer_start = 0; out_x_buffer_start < output_width; out_x_buffer_start += kOutputPixelsInAccBuffer) { const int out_x_buffer_end = std::min( @@ -1032,9 +1041,9 @@ inline void DepthwiseConv( // Accumulation loop. Most of the time should be spent in here. for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { - const int in_y = in_y_origin + filter_y; + const int in_y = in_y_origin + dilation_height_factor * filter_y; row_accum_func( - stride_width, input_depth, input_width, + stride_width, dilation_width_factor, input_depth, input_width, input_data + in_y * input_height_stride + b * input_batch_stride, pad_width, depth_multiplier, filter_width, filter_data + filter_y * filter_height_stride, out_x_buffer_start, @@ -1096,11 +1105,6 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, float output_activation_min, float output_activation_max, float* output_data, const Dims<4>& output_dims) { - // TODO(suharshs): Optimized implementation of dilation depthwise conv need to - // be implemented. - TFLITE_DCHECK_EQ(dilation_width_factor, 1); - TFLITE_DCHECK_EQ(dilation_height_factor, 1); - tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. op_params.padding_type = PaddingType::kSame; diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index ccb9d1654f..494cf70504 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1466,11 +1466,14 @@ struct QuantizedDepthwiseConvKernel { // Accumulates the effect of one row of the filter, on a segment of one row // of the output, accessing the corresponding one row of the input. template -void QuantizedDepthwiseConvAccumRow( - int stride, int input_depth, int input_width, const uint8* input_data, - int16 input_offset, int pad_width, int depth_multiplier, int filter_width, - const uint8* filter_data, int16 filter_offset, int out_x_buffer_start, - int out_x_buffer_end, int output_depth, int32* acc_buffer) { +void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor, + int input_depth, int input_width, + const uint8* input_data, int16 input_offset, + int pad_width, int depth_multiplier, + int filter_width, const uint8* filter_data, + int16 filter_offset, int out_x_buffer_start, + int out_x_buffer_end, int output_depth, + int32* acc_buffer) { #ifdef GEMMLOWP_PROFILING gemmlowp::ScopedProfilingLabel label(__PRETTY_FUNCTION__); #endif @@ -1537,10 +1540,11 @@ void QuantizedDepthwiseConvAccumRow( // generic fallback of DepthwiseConvAccumRow, portable, non-templatized. inline void QuantizedDepthwiseConvAccumRowGeneric( - int stride, int input_depth, int input_width, const uint8* input_data, - int16 input_offset, int pad_width, int depth_multiplier, int filter_width, - const uint8* filter_data, int16 filter_offset, int out_x_buffer_start, - int out_x_buffer_end, int output_depth, int32* acc_buffer) { + int stride, int dilation_factor, int input_depth, int input_width, + const uint8* input_data, int16 input_offset, int pad_width, + int depth_multiplier, int filter_width, const uint8* filter_data, + int16 filter_offset, int out_x_buffer_start, int out_x_buffer_end, + int output_depth, int32* acc_buffer) { gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); #ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK #ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK @@ -1562,6 +1566,7 @@ inline void QuantizedDepthwiseConvAccumRowGeneric( << "* stride = " << stride << "\n" << "* input_depth = " << input_depth << "\n" << "* depth_multiplier = " << depth_multiplier << "\n" + << "* dilation_factor = " << dilation_factor << "\n" << "*\n" << "* Please do not hesitate to contact benoitjacob@ with this\n" << "* information.\n" @@ -1571,14 +1576,17 @@ inline void QuantizedDepthwiseConvAccumRowGeneric( const uint8* filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int out_x_loop_start = std::max( - out_x_buffer_start, (pad_width - filter_x + stride - 1) / stride); - const int out_x_loop_end = - std::min(out_x_buffer_end, - (pad_width + input_width - filter_x + stride - 1) / stride); + out_x_buffer_start, + (pad_width - dilation_factor * filter_x + stride - 1) / stride); + const int out_x_loop_end = std::min( + out_x_buffer_end, + (pad_width + input_width - dilation_factor * filter_x + stride - 1) / + stride); int32* acc_buffer_ptr = acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth; - const int in_x_origin = (out_x_loop_start * stride) - pad_width + filter_x; + const int in_x_origin = + (out_x_loop_start * stride) - pad_width + dilation_factor * filter_x; const uint8* input_ptr = input_data + in_x_origin * input_depth; const int input_ptr_increment = (stride - 1) * input_depth; for (int out_x = out_x_loop_start; out_x < out_x_loop_end; out_x++) { @@ -1688,15 +1696,11 @@ inline void DepthwiseConv( const int32 output_offset = params.output_offset; const int32 output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - // TODO(suharshs): Optimized implementation of dilation depthwise conv need to - // be implemented. - TFLITE_DCHECK_EQ(params.dilation_width_factor, 1); - TFLITE_DCHECK_EQ(params.dilation_height_factor, 1); - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); const int batches = MatchingDim(input_shape, 0, output_shape, 0); const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); @@ -1714,14 +1718,18 @@ inline void DepthwiseConv( TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + const bool has_dilation = + (dilation_width_factor != 1) || (dilation_height_factor != 1); + // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. - if (Fast3x3FilterKernelSupported( - input_shape, filter_shape, stride_width, stride_height, pad_width, - pad_height, depth_multiplier, output_shape, output_shift)) { + if (Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width, + stride_height, has_dilation, pad_width, + pad_height, depth_multiplier, output_shape, + output_shift)) { DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data); @@ -1748,7 +1756,7 @@ inline void DepthwiseConv( FIXED_DEPTH_MULTIPLIER) \ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ - depth_multiplier == FIXED_DEPTH_MULTIPLIER) { \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) { \ row_accum_func = \ QuantizedDepthwiseConvAccumRow; \ @@ -1808,9 +1816,13 @@ inline void DepthwiseConv( for (int b = 0; b < batches; ++b) { for (int out_y = 0; out_y < output_height; ++out_y) { const int in_y_origin = (out_y * stride_height) - pad_height; - const int filter_y_start = std::max(0, -in_y_origin); + const int filter_y_start = + std::max(0, (-in_y_origin + dilation_height_factor - 1) / + dilation_height_factor); const int filter_y_end = - std::min(filter_height, input_height - in_y_origin); + std::min(filter_height, + (input_height - in_y_origin + dilation_height_factor - 1) / + dilation_height_factor); for (int out_x_buffer_start = 0; out_x_buffer_start < output_width; out_x_buffer_start += kOutputPixelsInAccBuffer) { const int out_x_buffer_end = std::min( @@ -1826,9 +1838,9 @@ inline void DepthwiseConv( // Accumulation loop. Most of the time should be spent in here. for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { - const int in_y = in_y_origin + filter_y; + const int in_y = in_y_origin + dilation_height_factor * filter_y; row_accum_func( - stride_width, input_depth, input_width, + stride_width, dilation_width_factor, input_depth, input_width, input_data + in_y * input_height_stride + b * input_batch_stride, input_offset, pad_width, depth_multiplier, filter_width, filter_data + filter_y * filter_height_stride, filter_offset, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 9fed53cafb..5087227182 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -3176,8 +3176,8 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, inline bool Fast3x3FilterKernelSupported( const RuntimeShape& input_shape, const RuntimeShape& filter_shape, - int32 stride_width, int32 stride_height, int32 pad_width, int32 pad_height, - int32 depth_multiplier, const RuntimeShape& output_shape, + int32 stride_width, int32 stride_height, bool has_dilation, int32 pad_width, + int32 pad_height, int32 depth_multiplier, const RuntimeShape& output_shape, int32 output_shift) { const int32 input_height = input_shape.Dims(1); const int32 input_width = input_shape.Dims(2); @@ -3193,7 +3193,7 @@ inline bool Fast3x3FilterKernelSupported( (stride_height == 1 || stride_height == 2) && (stride_width == stride_height) && (pad_width == 0 || pad_width == 1) && (pad_height == 0 || pad_height == 1) && (pad_width == pad_height) && - (input_depth % 8) == 0 && (output_shift > 0); + (input_depth % 8) == 0 && (output_shift > 0) && !has_dilation; if (!supported) { return false; diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.cc b/tensorflow/contrib/lite/kernels/internal/test_util.cc index 9b1fd9b344..5ae4b193d0 100644 --- a/tensorflow/contrib/lite/kernels/internal/test_util.cc +++ b/tensorflow/contrib/lite/kernels/internal/test_util.cc @@ -43,17 +43,21 @@ Dims<4> MakeDimsForInference(int depth, int width, int height, int batch) { // this is a copied from an internal function in propagate_fixed_sizes.cc bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, - int filter_height, int stride, PaddingType padding_type, + int filter_height, int stride, int dilation_width_factor, + int dilation_height_factor, PaddingType padding_type, Dims<4>* output_dims, int* pad_width, int* pad_height) { const int input_width = ArraySize(input_dims, 1); const int input_height = ArraySize(input_dims, 2); const int batch = ArraySize(input_dims, 3); + int dilated_filter_width = dilation_width_factor * (filter_width - 1) + 1; + int dilated_filter_height = dilation_height_factor * (filter_height - 1) + 1; + int output_height = 0; int output_width = 0; if (padding_type == PaddingType::kValid) { - output_height = (input_height + stride - filter_height) / stride; - output_width = (input_width + stride - filter_width) / stride; + output_height = (input_height + stride - dilated_filter_height) / stride; + output_width = (input_width + stride - dilated_filter_width) / stride; } else if (padding_type == PaddingType::kSame) { output_height = (input_height + stride - 1) / stride; output_width = (input_width + stride - 1) / stride; @@ -65,9 +69,13 @@ bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, return false; } - *pad_height = - ((output_height - 1) * stride + filter_height - input_height) / 2; - *pad_width = ((output_width - 1) * stride + filter_width - input_width) / 2; + *pad_height = std::max( + 0, ((output_height - 1) * stride + dilated_filter_height - input_height) / + 2); + *pad_width = std::max( + 0, + ((output_width - 1) * stride + dilated_filter_width - input_width) / 2); + *output_dims = MakeDimsForInference(output_depth, output_width, output_height, batch); return true; diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.h b/tensorflow/contrib/lite/kernels/internal/test_util.h index 26078cef49..cb6d8b147c 100644 --- a/tensorflow/contrib/lite/kernels/internal/test_util.h +++ b/tensorflow/contrib/lite/kernels/internal/test_util.h @@ -31,7 +31,8 @@ Dims<4> MakeDimsForInference(int depth, int width, int height, int batch); // Computes output and padding dimensions. bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, - int filter_height, int stride, PaddingType padding_type, + int filter_height, int stride, int dilation_width_factor, + int dilation_height_factor, PaddingType padding_type, Dims<4>* output_dims, int* pad_width, int* pad_height); // Returns a mt19937 random engine. -- GitLab From d5f4c3aa59aebc88f42a186a30ef6200857194ca Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Mon, 17 Sep 2018 15:46:30 -0700 Subject: [PATCH 0292/1357] Remove tensorflow/contrib/linalg library. linalg remains in core. PiperOrigin-RevId: 213352573 --- CODEOWNERS | 1 - tensorflow/contrib/BUILD | 1 - tensorflow/contrib/__init__.py | 1 - tensorflow/contrib/cmake/python_modules.txt | 3 - tensorflow/contrib/cmake/tf_tests.cmake | 1 - tensorflow/contrib/distributions/BUILD | 54 ++- tensorflow/contrib/linalg/BUILD | 44 -- tensorflow/contrib/linalg/__init__.py | 58 --- tensorflow/contrib/linalg/python/__init__.py | 19 - .../linear_operator_addition_test.py | 412 ----------------- .../python/ops/linear_operator_addition.py | 432 ------------------ 11 files changed, 26 insertions(+), 1000 deletions(-) delete mode 100644 tensorflow/contrib/linalg/BUILD delete mode 100644 tensorflow/contrib/linalg/__init__.py delete mode 100644 tensorflow/contrib/linalg/python/__init__.py delete mode 100644 tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py delete mode 100644 tensorflow/contrib/linalg/python/ops/linear_operator_addition.py diff --git a/CODEOWNERS b/CODEOWNERS index b612bccffb..94cc865479 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -41,7 +41,6 @@ /tensorflow/contrib/labeled_tensor/ @shoyer /tensorflow/contrib/layers/ @fchollet @martinwicke /tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp -/tensorflow/contrib/linalg/ @langmore /tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis /tensorflow/contrib/lookup/ @ysuematsu @andreasst /tensorflow/contrib/losses/ @alextp @ispirmustafa diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index d98a24994c..e1af52cd96 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -60,7 +60,6 @@ py_library( "//tensorflow/contrib/learn", "//tensorflow/contrib/legacy_seq2seq:seq2seq_py", "//tensorflow/contrib/libsvm", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", "//tensorflow/contrib/lite/python:lite", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 9478e42b46..e71b0e0ae3 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -63,7 +63,6 @@ from tensorflow.contrib import labeled_tensor from tensorflow.contrib import layers from tensorflow.contrib import learn from tensorflow.contrib import legacy_seq2seq -from tensorflow.contrib import linalg from tensorflow.contrib import linear_optimizer from tensorflow.contrib import lookup from tensorflow.contrib import losses diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index fb871acae9..1c432b6e0b 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -273,9 +273,6 @@ tensorflow/contrib/libsvm tensorflow/contrib/libsvm/python tensorflow/contrib/libsvm/python/kernel_tests tensorflow/contrib/libsvm/python/ops -tensorflow/contrib/linalg -tensorflow/contrib/linalg/python -tensorflow/contrib/linalg/python/ops tensorflow/contrib/linear_optimizer tensorflow/contrib/linear_optimizer/kernels tensorflow/contrib/linear_optimizer/kernels/g3doc diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 2c878c1716..ed31351d9e 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -183,7 +183,6 @@ if (tensorflow_BUILD_PYTHON_TESTS) file(GLOB_RECURSE tf_test_src_py ${tf_test_src_py} "${tensorflow_source_dir}/tensorflow/contrib/legacy_seq2seq/*_test.py" - "${tensorflow_source_dir}/tensorflow/contrib/linalg/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/graph_editor/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/bayesflow/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/framework/*_test.py" diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 9aadc634da..3ff7da4f89 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -25,7 +25,6 @@ py_library( "`tf.contrib.distributions` to `tfp.distributions`."), srcs_version = "PY2AND3", deps = [ - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:clip_ops", @@ -61,7 +60,6 @@ py_library( ":bijectors_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/learn", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", "//tensorflow/python:control_flow_ops", @@ -706,8 +704,8 @@ cuda_py_test( ":bijectors_py", ":distributions_py", "//third_party/py/numpy", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", @@ -722,8 +720,8 @@ cuda_py_test( additional_deps = [ ":distributions_py", "//third_party/py/numpy", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:client_testlib", + "//tensorflow/python/ops/linalg", ], shard_count = 4, tags = ["noasan"], # times out, http://b/78588814 @@ -739,8 +737,8 @@ cuda_py_test( additional_deps = [ ":distributions_py", "//third_party/py/numpy", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", @@ -794,8 +792,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -831,8 +829,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -852,8 +850,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -871,8 +869,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -907,8 +905,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -926,10 +924,10 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python/ops/linalg", "//tensorflow/python:framework_test_lib", "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", @@ -945,8 +943,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -964,8 +962,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -983,8 +981,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1002,8 +1000,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1021,8 +1019,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1040,8 +1038,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1075,8 +1073,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1126,8 +1124,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1161,8 +1159,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1180,8 +1178,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1201,8 +1199,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1221,8 +1219,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1240,8 +1238,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1259,8 +1257,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1278,8 +1276,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1297,8 +1295,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", @@ -1316,8 +1314,8 @@ cuda_py_test( ":distributions_py", "//third_party/py/numpy", "@six_archive//:six", - "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/python:array_ops", + "//tensorflow/python/ops/linalg", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:framework_test_lib", diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD deleted file mode 100644 index 78b7970069..0000000000 --- a/tensorflow/contrib/linalg/BUILD +++ /dev/null @@ -1,44 +0,0 @@ -# Description: -# Contains classes that provide access to common method of a [batch] matrix, -# without the need to instantiate the matrix. -# This allows for exploitation of structure, as well as a generic interface -# suitable for iterative solvers. - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package(default_visibility = ["//tensorflow:__subpackages__"]) - -load("//tensorflow:tensorflow.bzl", "cuda_py_test") - -py_library( - name = "linalg_py", - srcs = ["__init__.py"] + glob(["python/ops/*.py"]), - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:util", - "//tensorflow/python/ops/linalg", - "@six_archive//:six", - ], -) - -cuda_py_test( - name = "linear_operator_addition_test", - size = "small", - srcs = ["python/kernel_tests/linear_operator_addition_test.py"], - additional_deps = [ - ":linalg_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) diff --git a/tensorflow/contrib/linalg/__init__.py b/tensorflow/contrib/linalg/__init__.py deleted file mode 100644 index cbe4c03e4d..0000000000 --- a/tensorflow/contrib/linalg/__init__.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Linear algebra libraries. - -See the[Contrib Linalg](https://tensorflow.org/api_guides/python/contrib.linalg) -guide. - -@@LinearOperator -@@LinearOperatorBlockDiag -@@LinearOperatorCirculant -@@LinearOperatorCirculant2D -@@LinearOperatorCirculant3D -@@LinearOperatorDiag -@@LinearOperatorIdentity -@@LinearOperatorScaledIdentity -@@LinearOperatorFullMatrix -@@LinearOperatorKronecker -@@LinearOperatorLowerTriangular -@@LinearOperatorLowRankUpdate -@@LinearOperatorComposition -@@add_operators - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member - -from tensorflow.contrib.linalg.python.ops.linear_operator_addition import * -from tensorflow.python.ops.linalg.linear_operator import * -from tensorflow.python.ops.linalg.linear_operator_block_diag import * -from tensorflow.python.ops.linalg.linear_operator_circulant import * -from tensorflow.python.ops.linalg.linear_operator_composition import * -from tensorflow.python.ops.linalg.linear_operator_diag import * -from tensorflow.python.ops.linalg.linear_operator_full_matrix import * -from tensorflow.python.ops.linalg.linear_operator_identity import * -from tensorflow.python.ops.linalg.linear_operator_kronecker import * -from tensorflow.python.ops.linalg.linear_operator_low_rank_update import * -from tensorflow.python.ops.linalg.linear_operator_lower_triangular import * - -# pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member - -from tensorflow.python.util.all_util import remove_undocumented - -remove_undocumented(__name__) diff --git a/tensorflow/contrib/linalg/python/__init__.py b/tensorflow/contrib/linalg/python/__init__.py deleted file mode 100644 index c5ca3a623f..0000000000 --- a/tensorflow/contrib/linalg/python/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""ops module.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py deleted file mode 100644 index d94ac73654..0000000000 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py +++ /dev/null @@ -1,412 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.linalg.python.ops import linear_operator_addition -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops.linalg import linalg as linalg_lib -from tensorflow.python.platform import test - -linalg = linalg_lib -random_seed.set_random_seed(23) -rng = np.random.RandomState(0) - -add_operators = linear_operator_addition.add_operators - - -# pylint: disable=unused-argument -class _BadAdder(linear_operator_addition._Adder): - """Adder that will fail if used.""" - - def can_add(self, op1, op2): - raise AssertionError("BadAdder.can_add called!") - - def _add(self, op1, op2, operator_name, hints): - raise AssertionError("This line should not be reached") - - -# pylint: enable=unused-argument - - -class LinearOperatorAdditionCorrectnessTest(test.TestCase): - """Tests correctness of addition with combinations of a few Adders. - - Tests here are done with the _DEFAULT_ADDITION_TIERS, which means - add_operators should reduce all operators resulting in one single operator. - - This shows that we are able to correctly combine adders using the tiered - system. All Adders should be tested separately, and there is no need to test - every Adder within this class. - """ - - def test_one_operator_is_returned_unchanged(self): - op_a = linalg.LinearOperatorDiag([1., 1.]) - op_sum = add_operators([op_a]) - self.assertEqual(1, len(op_sum)) - self.assertTrue(op_sum[0] is op_a) - - def test_at_least_one_operators_required(self): - with self.assertRaisesRegexp(ValueError, "must contain at least one"): - add_operators([]) - - def test_attempting_to_add_numbers_raises(self): - with self.assertRaisesRegexp(TypeError, "contain only LinearOperator"): - add_operators([1, 2]) - - def test_two_diag_operators(self): - op_a = linalg.LinearOperatorDiag( - [1., 1.], is_positive_definite=True, name="A") - op_b = linalg.LinearOperatorDiag( - [2., 2.], is_positive_definite=True, name="B") - with self.cached_session(): - op_sum = add_operators([op_a, op_b]) - self.assertEqual(1, len(op_sum)) - op = op_sum[0] - self.assertTrue(isinstance(op, linalg_lib.LinearOperatorDiag)) - self.assertAllClose([[3., 0.], [0., 3.]], op.to_dense().eval()) - # Adding positive definite operators produces positive def. - self.assertTrue(op.is_positive_definite) - # Real diagonal ==> self-adjoint. - self.assertTrue(op.is_self_adjoint) - # Positive definite ==> non-singular - self.assertTrue(op.is_non_singular) - # Enforce particular name for this simple case - self.assertEqual("Add/B__A/", op.name) - - def test_three_diag_operators(self): - op1 = linalg.LinearOperatorDiag( - [1., 1.], is_positive_definite=True, name="op1") - op2 = linalg.LinearOperatorDiag( - [2., 2.], is_positive_definite=True, name="op2") - op3 = linalg.LinearOperatorDiag( - [3., 3.], is_positive_definite=True, name="op3") - with self.cached_session(): - op_sum = add_operators([op1, op2, op3]) - self.assertEqual(1, len(op_sum)) - op = op_sum[0] - self.assertTrue(isinstance(op, linalg_lib.LinearOperatorDiag)) - self.assertAllClose([[6., 0.], [0., 6.]], op.to_dense().eval()) - # Adding positive definite operators produces positive def. - self.assertTrue(op.is_positive_definite) - # Real diagonal ==> self-adjoint. - self.assertTrue(op.is_self_adjoint) - # Positive definite ==> non-singular - self.assertTrue(op.is_non_singular) - - def test_diag_tril_diag(self): - op1 = linalg.LinearOperatorDiag( - [1., 1.], is_non_singular=True, name="diag_a") - op2 = linalg.LinearOperatorLowerTriangular( - [[2., 0.], [0., 2.]], - is_self_adjoint=True, - is_non_singular=True, - name="tril") - op3 = linalg.LinearOperatorDiag( - [3., 3.], is_non_singular=True, name="diag_b") - with self.cached_session(): - op_sum = add_operators([op1, op2, op3]) - self.assertEqual(1, len(op_sum)) - op = op_sum[0] - self.assertTrue(isinstance(op, linalg_lib.LinearOperatorLowerTriangular)) - self.assertAllClose([[6., 0.], [0., 6.]], op.to_dense().eval()) - - # The diag operators will be self-adjoint (because real and diagonal). - # The TriL operator has the self-adjoint hint set. - self.assertTrue(op.is_self_adjoint) - - # Even though op1/2/3 are non-singular, this does not imply op is. - # Since no custom hint was provided, we default to None (unknown). - self.assertEqual(None, op.is_non_singular) - - def test_matrix_diag_tril_diag_uses_custom_name(self): - op0 = linalg.LinearOperatorFullMatrix( - [[-1., -1.], [-1., -1.]], name="matrix") - op1 = linalg.LinearOperatorDiag([1., 1.], name="diag_a") - op2 = linalg.LinearOperatorLowerTriangular( - [[2., 0.], [1.5, 2.]], name="tril") - op3 = linalg.LinearOperatorDiag([3., 3.], name="diag_b") - with self.cached_session(): - op_sum = add_operators([op0, op1, op2, op3], operator_name="my_operator") - self.assertEqual(1, len(op_sum)) - op = op_sum[0] - self.assertTrue(isinstance(op, linalg_lib.LinearOperatorFullMatrix)) - self.assertAllClose([[5., -1.], [0.5, 5.]], op.to_dense().eval()) - self.assertEqual("my_operator", op.name) - - def test_incompatible_domain_dimensions_raises(self): - op1 = linalg.LinearOperatorFullMatrix(rng.rand(2, 3)) - op2 = linalg.LinearOperatorDiag(rng.rand(2, 4)) - with self.assertRaisesRegexp(ValueError, "must.*same domain dimension"): - add_operators([op1, op2]) - - def test_incompatible_range_dimensions_raises(self): - op1 = linalg.LinearOperatorFullMatrix(rng.rand(2, 3)) - op2 = linalg.LinearOperatorDiag(rng.rand(3, 3)) - with self.assertRaisesRegexp(ValueError, "must.*same range dimension"): - add_operators([op1, op2]) - - def test_non_broadcastable_batch_shape_raises(self): - op1 = linalg.LinearOperatorFullMatrix(rng.rand(2, 3, 3)) - op2 = linalg.LinearOperatorDiag(rng.rand(4, 3, 3)) - with self.assertRaisesRegexp(ValueError, "Incompatible shapes"): - add_operators([op1, op2]) - - -class LinearOperatorOrderOfAdditionTest(test.TestCase): - """Test that the order of addition is done as specified by tiers.""" - - def test_tier_0_additions_done_in_tier_0(self): - diag1 = linalg.LinearOperatorDiag([1.]) - diag2 = linalg.LinearOperatorDiag([1.]) - diag3 = linalg.LinearOperatorDiag([1.]) - addition_tiers = [ - [linear_operator_addition._AddAndReturnDiag()], - [_BadAdder()], - ] - # Should not raise since all were added in tier 0, and tier 1 (with the - # _BadAdder) was never reached. - op_sum = add_operators([diag1, diag2, diag3], addition_tiers=addition_tiers) - self.assertEqual(1, len(op_sum)) - self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorDiag)) - - def test_tier_1_additions_done_by_tier_1(self): - diag1 = linalg.LinearOperatorDiag([1.]) - diag2 = linalg.LinearOperatorDiag([1.]) - tril = linalg.LinearOperatorLowerTriangular([[1.]]) - addition_tiers = [ - [linear_operator_addition._AddAndReturnDiag()], - [linear_operator_addition._AddAndReturnTriL()], - [_BadAdder()], - ] - # Should not raise since all were added by tier 1, and the - # _BadAdder) was never reached. - op_sum = add_operators([diag1, diag2, tril], addition_tiers=addition_tiers) - self.assertEqual(1, len(op_sum)) - self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorLowerTriangular)) - - def test_tier_1_additions_done_by_tier_1_with_order_flipped(self): - diag1 = linalg.LinearOperatorDiag([1.]) - diag2 = linalg.LinearOperatorDiag([1.]) - tril = linalg.LinearOperatorLowerTriangular([[1.]]) - addition_tiers = [ - [linear_operator_addition._AddAndReturnTriL()], - [linear_operator_addition._AddAndReturnDiag()], - [_BadAdder()], - ] - # Tier 0 could convert to TriL, and this converted everything to TriL, - # including the Diags. - # Tier 1 was never used. - # Tier 2 was never used (therefore, _BadAdder didn't raise). - op_sum = add_operators([diag1, diag2, tril], addition_tiers=addition_tiers) - self.assertEqual(1, len(op_sum)) - self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorLowerTriangular)) - - def test_cannot_add_everything_so_return_more_than_one_operator(self): - diag1 = linalg.LinearOperatorDiag([1.]) - diag2 = linalg.LinearOperatorDiag([2.]) - tril5 = linalg.LinearOperatorLowerTriangular([[5.]]) - addition_tiers = [ - [linear_operator_addition._AddAndReturnDiag()], - ] - # Tier 0 (the only tier) can only convert to Diag, so it combines the two - # diags, but the TriL is unchanged. - # Result should contain two operators, one Diag, one TriL. - op_sum = add_operators([diag1, diag2, tril5], addition_tiers=addition_tiers) - self.assertEqual(2, len(op_sum)) - found_diag = False - found_tril = False - with self.cached_session(): - for op in op_sum: - if isinstance(op, linalg.LinearOperatorDiag): - found_diag = True - self.assertAllClose([[3.]], op.to_dense().eval()) - if isinstance(op, linalg.LinearOperatorLowerTriangular): - found_tril = True - self.assertAllClose([[5.]], op.to_dense().eval()) - self.assertTrue(found_diag and found_tril) - - def test_intermediate_tier_is_not_skipped(self): - diag1 = linalg.LinearOperatorDiag([1.]) - diag2 = linalg.LinearOperatorDiag([1.]) - tril = linalg.LinearOperatorLowerTriangular([[1.]]) - addition_tiers = [ - [linear_operator_addition._AddAndReturnDiag()], - [_BadAdder()], - [linear_operator_addition._AddAndReturnTriL()], - ] - # tril cannot be added in tier 0, and the intermediate tier 1 with the - # BadAdder will catch it and raise. - with self.assertRaisesRegexp(AssertionError, "BadAdder.can_add called"): - add_operators([diag1, diag2, tril], addition_tiers=addition_tiers) - - -class AddAndReturnScaledIdentityTest(test.TestCase): - - def setUp(self): - self._adder = linear_operator_addition._AddAndReturnScaledIdentity() - - def test_identity_plus_identity(self): - id1 = linalg.LinearOperatorIdentity(num_rows=2) - id2 = linalg.LinearOperatorIdentity(num_rows=2, batch_shape=[3]) - hints = linear_operator_addition._Hints( - is_positive_definite=True, is_non_singular=True) - - self.assertTrue(self._adder.can_add(id1, id2)) - operator = self._adder.add(id1, id2, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity)) - - with self.cached_session(): - self.assertAllClose(2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) - self.assertTrue(operator.is_positive_definite) - self.assertTrue(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - def test_identity_plus_scaled_identity(self): - id1 = linalg.LinearOperatorIdentity(num_rows=2, batch_shape=[3]) - id2 = linalg.LinearOperatorScaledIdentity(num_rows=2, multiplier=2.2) - hints = linear_operator_addition._Hints( - is_positive_definite=True, is_non_singular=True) - - self.assertTrue(self._adder.can_add(id1, id2)) - operator = self._adder.add(id1, id2, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity)) - - with self.cached_session(): - self.assertAllClose(3.2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) - self.assertTrue(operator.is_positive_definite) - self.assertTrue(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - def test_scaled_identity_plus_scaled_identity(self): - id1 = linalg.LinearOperatorScaledIdentity( - num_rows=2, multiplier=[2.2, 2.2, 2.2]) - id2 = linalg.LinearOperatorScaledIdentity(num_rows=2, multiplier=-1.0) - hints = linear_operator_addition._Hints( - is_positive_definite=True, is_non_singular=True) - - self.assertTrue(self._adder.can_add(id1, id2)) - operator = self._adder.add(id1, id2, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorScaledIdentity)) - - with self.cached_session(): - self.assertAllClose(1.2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) - self.assertTrue(operator.is_positive_definite) - self.assertTrue(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - -class AddAndReturnDiagTest(test.TestCase): - - def setUp(self): - self._adder = linear_operator_addition._AddAndReturnDiag() - - def test_identity_plus_identity_returns_diag(self): - id1 = linalg.LinearOperatorIdentity(num_rows=2) - id2 = linalg.LinearOperatorIdentity(num_rows=2, batch_shape=[3]) - hints = linear_operator_addition._Hints( - is_positive_definite=True, is_non_singular=True) - - self.assertTrue(self._adder.can_add(id1, id2)) - operator = self._adder.add(id1, id2, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag)) - - with self.cached_session(): - self.assertAllClose(2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) - self.assertTrue(operator.is_positive_definite) - self.assertTrue(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - def test_diag_plus_diag(self): - diag1 = rng.rand(2, 3, 4) - diag2 = rng.rand(4) - op1 = linalg.LinearOperatorDiag(diag1) - op2 = linalg.LinearOperatorDiag(diag2) - hints = linear_operator_addition._Hints( - is_positive_definite=True, is_non_singular=True) - - self.assertTrue(self._adder.can_add(op1, op2)) - operator = self._adder.add(op1, op2, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorDiag)) - - with self.cached_session(): - self.assertAllClose( - linalg.LinearOperatorDiag(diag1 + diag2).to_dense().eval(), - operator.to_dense().eval()) - self.assertTrue(operator.is_positive_definite) - self.assertTrue(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - -class AddAndReturnTriLTest(test.TestCase): - - def setUp(self): - self._adder = linear_operator_addition._AddAndReturnTriL() - - def test_diag_plus_tril(self): - diag = linalg.LinearOperatorDiag([1., 2.]) - tril = linalg.LinearOperatorLowerTriangular([[10., 0.], [30., 0.]]) - hints = linear_operator_addition._Hints( - is_positive_definite=True, is_non_singular=True) - - self.assertTrue(self._adder.can_add(diag, diag)) - self.assertTrue(self._adder.can_add(diag, tril)) - operator = self._adder.add(diag, tril, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorLowerTriangular)) - - with self.cached_session(): - self.assertAllClose([[11., 0.], [30., 2.]], operator.to_dense().eval()) - self.assertTrue(operator.is_positive_definite) - self.assertTrue(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - -class AddAndReturnMatrixTest(test.TestCase): - - def setUp(self): - self._adder = linear_operator_addition._AddAndReturnMatrix() - - def test_diag_plus_diag(self): - diag1 = linalg.LinearOperatorDiag([1., 2.]) - diag2 = linalg.LinearOperatorDiag([-1., 3.]) - hints = linear_operator_addition._Hints( - is_positive_definite=False, is_non_singular=False) - - self.assertTrue(self._adder.can_add(diag1, diag2)) - operator = self._adder.add(diag1, diag2, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorFullMatrix)) - - with self.cached_session(): - self.assertAllClose([[0., 0.], [0., 5.]], operator.to_dense().eval()) - self.assertFalse(operator.is_positive_definite) - self.assertFalse(operator.is_non_singular) - self.assertEqual("my_operator", operator.name) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py b/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py deleted file mode 100644 index 86130a2c07..0000000000 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py +++ /dev/null @@ -1,432 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Add one or more `LinearOperators` efficiently.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc - -import six - -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops.linalg import linear_operator -from tensorflow.python.ops.linalg import linear_operator_diag -from tensorflow.python.ops.linalg import linear_operator_full_matrix -from tensorflow.python.ops.linalg import linear_operator_identity -from tensorflow.python.ops.linalg import linear_operator_lower_triangular - -__all__ = [] - - -def add_operators(operators, - operator_name=None, - addition_tiers=None, - name=None): - """Efficiently add one or more linear operators. - - Given operators `[A1, A2,...]`, this `Op` returns a possibly shorter list of - operators `[B1, B2,...]` such that - - ```sum_k Ak.matmul(x) = sum_k Bk.matmul(x).``` - - The operators `Bk` result by adding some of the `Ak`, as allowed by - `addition_tiers`. - - Example of efficient adding of diagonal operators. - - ```python - A1 = LinearOperatorDiag(diag=[1., 1.], name="A1") - A2 = LinearOperatorDiag(diag=[2., 2.], name="A2") - - # Use two tiers, the first contains an Adder that returns Diag. Since both - # A1 and A2 are Diag, they can use this Adder. The second tier will not be - # used. - addition_tiers = [ - [_AddAndReturnDiag()], - [_AddAndReturnMatrix()]] - B_list = add_operators([A1, A2], addition_tiers=addition_tiers) - - len(B_list) - ==> 1 - - B_list[0].__class__.__name__ - ==> 'LinearOperatorDiag' - - B_list[0].to_dense() - ==> [[3., 0.], - [0., 3.]] - - B_list[0].name - ==> 'Add/A1__A2/' - ``` - - Args: - operators: Iterable of `LinearOperator` objects with same `dtype`, domain - and range dimensions, and broadcastable batch shapes. - operator_name: String name for returned `LinearOperator`. Defaults to - concatenation of "Add/A__B/" that indicates the order of addition steps. - addition_tiers: List tiers, like `[tier_0, tier_1, ...]`, where `tier_i` - is a list of `Adder` objects. This function attempts to do all additions - in tier `i` before trying tier `i + 1`. - name: A name for this `Op`. Defaults to `add_operators`. - - Returns: - Subclass of `LinearOperator`. Class and order of addition may change as new - (and better) addition strategies emerge. - - Raises: - ValueError: If `operators` argument is empty. - ValueError: If shapes are incompatible. - """ - # Default setting - if addition_tiers is None: - addition_tiers = _DEFAULT_ADDITION_TIERS - - # Argument checking. - check_ops.assert_proper_iterable(operators) - operators = list(reversed(operators)) - if len(operators) < 1: - raise ValueError( - "Argument 'operators' must contain at least one operator. " - "Found: %s" % operators) - if not all( - isinstance(op, linear_operator.LinearOperator) for op in operators): - raise TypeError( - "Argument 'operators' must contain only LinearOperator instances. " - "Found: %s" % operators) - _static_check_for_same_dimensions(operators) - _static_check_for_broadcastable_batch_shape(operators) - - graph_parents = [] - for operator in operators: - graph_parents.extend(operator.graph_parents) - - with ops.name_scope(name or "add_operators", values=graph_parents): - - # Additions done in one of the tiers. Try tier 0, 1,... - ops_to_try_at_next_tier = list(operators) - for tier in addition_tiers: - ops_to_try_at_this_tier = ops_to_try_at_next_tier - ops_to_try_at_next_tier = [] - while ops_to_try_at_this_tier: - op1 = ops_to_try_at_this_tier.pop() - op2, adder = _pop_a_match_at_tier(op1, ops_to_try_at_this_tier, tier) - if op2 is not None: - # Will try to add the result of this again at this same tier. - new_operator = adder.add(op1, op2, operator_name) - ops_to_try_at_this_tier.append(new_operator) - else: - ops_to_try_at_next_tier.append(op1) - - return ops_to_try_at_next_tier - - -def _pop_a_match_at_tier(op1, operator_list, tier): - # Search from the back of list to the front in order to create nice default - # order of operations. - for i in range(1, len(operator_list) + 1): - op2 = operator_list[-i] - for adder in tier: - if adder.can_add(op1, op2): - return operator_list.pop(-i), adder - return None, None - - -def _infer_hints_allowing_override(op1, op2, hints): - """Infer hints from op1 and op2. hints argument is an override. - - Args: - op1: LinearOperator - op2: LinearOperator - hints: _Hints object holding "is_X" boolean hints to use for returned - operator. - If some hint is None, try to set using op1 and op2. If the - hint is provided, ignore op1 and op2 hints. This allows an override - of previous hints, but does not allow forbidden hints (e.g. you still - cannot say a real diagonal operator is not self-adjoint. - - Returns: - _Hints object. - """ - hints = hints or _Hints() - # If A, B are self-adjoint, then so is A + B. - if hints.is_self_adjoint is None: - is_self_adjoint = op1.is_self_adjoint and op2.is_self_adjoint - else: - is_self_adjoint = hints.is_self_adjoint - - # If A, B are positive definite, then so is A + B. - if hints.is_positive_definite is None: - is_positive_definite = op1.is_positive_definite and op2.is_positive_definite - else: - is_positive_definite = hints.is_positive_definite - - # A positive definite operator is always non-singular. - if is_positive_definite and hints.is_positive_definite is None: - is_non_singular = True - else: - is_non_singular = hints.is_non_singular - - return _Hints( - is_non_singular=is_non_singular, - is_self_adjoint=is_self_adjoint, - is_positive_definite=is_positive_definite) - - -def _static_check_for_same_dimensions(operators): - """ValueError if operators determined to have different dimensions.""" - if len(operators) < 2: - return - - domain_dimensions = [(op.name, op.domain_dimension.value) for op in operators - if op.domain_dimension.value is not None] - if len(set(value for name, value in domain_dimensions)) > 1: - raise ValueError("Operators must have the same domain dimension. Found: %s" - % domain_dimensions) - - range_dimensions = [(op.name, op.range_dimension.value) for op in operators - if op.range_dimension.value is not None] - if len(set(value for name, value in range_dimensions)) > 1: - raise ValueError("Operators must have the same range dimension. Found: %s" % - range_dimensions) - - -def _static_check_for_broadcastable_batch_shape(operators): - """ValueError if operators determined to have non-broadcastable shapes.""" - if len(operators) < 2: - return - - # This will fail if they cannot be broadcast together. - batch_shape = operators[0].batch_shape - for op in operators[1:]: - batch_shape = array_ops.broadcast_static_shape(batch_shape, op.batch_shape) - - -class _Hints(object): - """Holds 'is_X' flags that every LinearOperator is initialized with.""" - - def __init__(self, - is_non_singular=None, - is_positive_definite=None, - is_self_adjoint=None): - self.is_non_singular = is_non_singular - self.is_positive_definite = is_positive_definite - self.is_self_adjoint = is_self_adjoint - - -################################################################################ -# Classes to add two linear operators. -################################################################################ - - -@six.add_metaclass(abc.ABCMeta) -class _Adder(object): - """Abstract base class to add two operators. - - Each `Adder` acts independently, adding everything it can, paying no attention - as to whether another `Adder` could have done the addition more efficiently. - """ - - @property - def name(self): - return self.__class__.__name__ - - @abc.abstractmethod - def can_add(self, op1, op2): - """Returns `True` if this `Adder` can add `op1` and `op2`. Else `False`.""" - pass - - @abc.abstractmethod - def _add(self, op1, op2, operator_name, hints): - # Derived classes can assume op1 and op2 have been validated, e.g. they have - # the same dtype, and their domain/range dimensions match. - pass - - def add(self, op1, op2, operator_name, hints=None): - """Return new `LinearOperator` acting like `op1 + op2`. - - Args: - op1: `LinearOperator` - op2: `LinearOperator`, with `shape` and `dtype` such that adding to - `op1` is allowed. - operator_name: `String` name to give to returned `LinearOperator` - hints: `_Hints` object. Returned `LinearOperator` will be created with - these hints. - - Returns: - `LinearOperator` - """ - updated_hints = _infer_hints_allowing_override(op1, op2, hints) - - if operator_name is None: - operator_name = "Add/" + op1.name + "__" + op2.name + "/" - - values = op1.graph_parents + op2.graph_parents - scope_name = self.name - if scope_name.startswith("_"): - scope_name = scope_name[1:] - with ops.name_scope(scope_name, values=values): - return self._add(op1, op2, operator_name, updated_hints) - - -class _AddAndReturnScaledIdentity(_Adder): - """Handles additions resulting in an Identity family member. - - The Identity (`LinearOperatorScaledIdentity`, `LinearOperatorIdentity`) family - is closed under addition. This `Adder` respects that, and returns an Identity - """ - - def can_add(self, op1, op2): - types = {_type(op1), _type(op2)} - return not types.difference(_IDENTITY_FAMILY) - - def _add(self, op1, op2, operator_name, hints): - # Will build a LinearOperatorScaledIdentity. - - if _type(op1) == _SCALED_IDENTITY: - multiplier_1 = op1.multiplier - else: - multiplier_1 = array_ops.ones(op1.batch_shape_tensor(), dtype=op1.dtype) - - if _type(op2) == _SCALED_IDENTITY: - multiplier_2 = op2.multiplier - else: - multiplier_2 = array_ops.ones(op2.batch_shape_tensor(), dtype=op2.dtype) - - return linear_operator_identity.LinearOperatorScaledIdentity( - num_rows=op1.range_dimension_tensor(), - multiplier=multiplier_1 + multiplier_2, - is_non_singular=hints.is_non_singular, - is_self_adjoint=hints.is_self_adjoint, - is_positive_definite=hints.is_positive_definite, - name=operator_name) - - -class _AddAndReturnDiag(_Adder): - """Handles additions resulting in a Diag operator.""" - - def can_add(self, op1, op2): - types = {_type(op1), _type(op2)} - return not types.difference(_DIAG_LIKE) - - def _add(self, op1, op2, operator_name, hints): - return linear_operator_diag.LinearOperatorDiag( - diag=op1.diag_part() + op2.diag_part(), - is_non_singular=hints.is_non_singular, - is_self_adjoint=hints.is_self_adjoint, - is_positive_definite=hints.is_positive_definite, - name=operator_name) - - -class _AddAndReturnTriL(_Adder): - """Handles additions resulting in a TriL operator.""" - - def can_add(self, op1, op2): - types = {_type(op1), _type(op2)} - return not types.difference(_DIAG_LIKE.union({_TRIL})) - - def _add(self, op1, op2, operator_name, hints): - if _type(op1) in _EFFICIENT_ADD_TO_TENSOR: - op_add_to_tensor, op_other = op1, op2 - else: - op_add_to_tensor, op_other = op2, op1 - - return linear_operator_lower_triangular.LinearOperatorLowerTriangular( - tril=op_add_to_tensor.add_to_tensor(op_other.to_dense()), - is_non_singular=hints.is_non_singular, - is_self_adjoint=hints.is_self_adjoint, - is_positive_definite=hints.is_positive_definite, - name=operator_name) - - -class _AddAndReturnMatrix(_Adder): - """"Handles additions resulting in a `LinearOperatorFullMatrix`.""" - - def can_add(self, op1, op2): # pylint: disable=unused-argument - return isinstance(op1, linear_operator.LinearOperator) and isinstance( - op2, linear_operator.LinearOperator) - - def _add(self, op1, op2, operator_name, hints): - if _type(op1) in _EFFICIENT_ADD_TO_TENSOR: - op_add_to_tensor, op_other = op1, op2 - else: - op_add_to_tensor, op_other = op2, op1 - return linear_operator_full_matrix.LinearOperatorFullMatrix( - matrix=op_add_to_tensor.add_to_tensor(op_other.to_dense()), - is_non_singular=hints.is_non_singular, - is_self_adjoint=hints.is_self_adjoint, - is_positive_definite=hints.is_positive_definite, - name=operator_name) - - -################################################################################ -# Constants designating types of LinearOperators -################################################################################ - -# Type name constants for LinearOperator classes. -_IDENTITY = "identity" -_SCALED_IDENTITY = "scaled_identity" -_DIAG = "diag" -_TRIL = "tril" -_MATRIX = "matrix" - -# Groups of operators. -_DIAG_LIKE = {_DIAG, _IDENTITY, _SCALED_IDENTITY} -_IDENTITY_FAMILY = {_IDENTITY, _SCALED_IDENTITY} -# operators with an efficient .add_to_tensor() method. -_EFFICIENT_ADD_TO_TENSOR = _DIAG_LIKE - - -def _type(operator): - """Returns the type name constant (e.g. _TRIL) for operator.""" - if isinstance(operator, linear_operator_diag.LinearOperatorDiag): - return _DIAG - if isinstance(operator, - linear_operator_lower_triangular.LinearOperatorLowerTriangular): - return _TRIL - if isinstance(operator, linear_operator_full_matrix.LinearOperatorFullMatrix): - return _MATRIX - if isinstance(operator, linear_operator_identity.LinearOperatorIdentity): - return _IDENTITY - if isinstance(operator, - linear_operator_identity.LinearOperatorScaledIdentity): - return _SCALED_IDENTITY - raise TypeError("Operator type unknown: %s" % operator) - - -################################################################################ -# Addition tiers: -# We attempt to use Adders in tier K before K+1. -# -# Organize tiers to -# (i) reduce O(..) complexity of forming final operator, and -# (ii) produce the "most efficient" final operator. -# Dev notes: -# * Results of addition at tier K will be added at tier K or higher. -# * Tiers may change, and we warn the user that it may change. -################################################################################ - -# Note that the final tier, _AddAndReturnMatrix, will convert everything to a -# dense matrix. So it is sometimes very inefficient. -_DEFAULT_ADDITION_TIERS = [ - [_AddAndReturnScaledIdentity()], - [_AddAndReturnDiag()], - [_AddAndReturnTriL()], - [_AddAndReturnMatrix()], -] -- GitLab From 8ea4ea95ad1e85948019daee7a9e70e97082f6d0 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 17 Sep 2018 15:50:12 -0700 Subject: [PATCH 0293/1357] Fix GraphConstructor and import_graph_def bug with variadic ops. Prior to this change, GraphConstructor::PopulateMissingUnusedInputMapKey() didn't correctly compute the number of outputs for ops with variadic outputs. This meant that missing_unused_input_map_keys could contain spurious entries for unused variadic outputs, which could trigger a ValueError in import_graph_def. This also adds a new util method in node_def_util.h, NumOutputsForNode(). PiperOrigin-RevId: 213353158 --- tensorflow/core/framework/node_def_util.cc | 8 ++++++++ tensorflow/core/framework/node_def_util.h | 4 ++++ tensorflow/core/graph/graph_constructor.cc | 4 +++- tensorflow/core/graph/graph_constructor_test.cc | 9 ++++++++- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc index bacc1d72c4..42ec315a32 100644 --- a/tensorflow/core/framework/node_def_util.cc +++ b/tensorflow/core/framework/node_def_util.cc @@ -403,6 +403,14 @@ Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def, return OutputTypesForNode(node_def, op_def, outputs); } +Status NumOutputsForNode(const NodeDef& node_def, const OpDef& op_def, + int* num_outputs) { + DataTypeVector outputs; + TF_RETURN_IF_ERROR(OutputTypesForNode(node_def, op_def, &outputs)); + *num_outputs = outputs.size(); + return Status::OK(); +} + Status ValidateNodeDef(const NodeDef& node_def, const OpDef& op_def) { if (node_def.op() != op_def.name()) { return errors::InvalidArgument("NodeDef op '", node_def.op(), diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h index 499034cab2..7528d3d306 100644 --- a/tensorflow/core/framework/node_def_util.h +++ b/tensorflow/core/framework/node_def_util.h @@ -261,6 +261,10 @@ Status OutputTypesForNode(const NodeDef& node_def, const OpDef& op_def, // REQUIRES: ValidateOpDef(op_def).ok() Status InOutTypesForNode(const NodeDef& node_def, const OpDef& op_def, DataTypeVector* inputs, DataTypeVector* outputs); +// Computes the number of outputs for a specific node. +// REQUIRES: ValidateOpDef(op_def).ok() +Status NumOutputsForNode(const NodeDef& node_def, const OpDef& op_def, + int* num_outputs); // Validates that the NodeDef: // * Defines all expected attrs from the OpDef. diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 7399613f6a..eeb5c14eaa 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -1162,7 +1162,9 @@ Status GraphConstructor::PopulateMissingUnusedInputMapKeys() { const NodeDef* node_def = node_defs_[pair->second.gdef_index]; const OpDef* op_def; TF_RETURN_IF_ERROR(g_->op_registry()->LookUpOpDef(node_def->op(), &op_def)); - if (key.second >= op_def->output_arg_size()) { + int num_outputs; + TF_RETURN_IF_ERROR(NumOutputsForNode(*node_def, *op_def, &num_outputs)); + if (key.second >= num_outputs) { // key's index out of bounds missing_unused_input_map_keys_->push_back(key); } diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index 73142ebde7..3eef6bd2bd 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -199,6 +199,10 @@ REGISTER_OP("TestOneInputOneOutput") .Output("y: T") .Attr("T: {float, int64}") .SetShapeFn(shape_inference::UnchangedShape); +REGISTER_OP("TestVariadicOutput") + .Output("outputs: N * int32") + .Attr("N: int >= 0") + .SetShapeFn(shape_inference::UnknownShape); REGISTER_OP("TestDefaultAttr") .Attr("default_int: int=31415") .SetShapeFn(shape_inference::NoOutputs); @@ -1463,12 +1467,15 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapMissingUnusedKeys) { opts.input_map[TensorId("DNE", 0)] = TensorId("input", 0); // Unused but not missing opts.input_map[TensorId("t1", 0)] = TensorId("W1", 0); + // Unused but not missing + opts.input_map[TensorId("variadic", 4)] = TensorId("input", 0); ExpectOK( R"EOF( node { name: 'W2' op: 'TestParams' } node { name: 'new_input' op: 'TestInput' input: [ '^W2' ] } node { name: 't1' op: 'TestMul' input: [ 'new_input:0', 'new_input:1' ] } - node { name: 't2' op: 'TestMul' input: [ 't1:0', 't1:0' ] } + node { name: 'variadic' op: 'TestVariadicOutput' + attr { key: "N" value { i: 5 } } } )EOF", opts, &refiner, &results); -- GitLab From f5116dd366a5bb1d679e1682c13b8fa3c4830a84 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 15:56:43 -0700 Subject: [PATCH 0294/1357] Fixing the documentation of the parse_sequence_example function. PiperOrigin-RevId: 213354240 --- tensorflow/python/ops/parsing_ops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index bb8da3162a..b3e03a0135 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -981,9 +981,10 @@ def parse_sequence_example(serialized, name: A name for this operation (optional). Returns: - A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. - The first dict contains the context key/values. - The second dict contains the feature_list key/values. + A tuple of three `dict`s, each mapping keys to `Tensor`s and + `SparseTensor`s. The first dict contains the context key/values, + the second dict contains the feature_list key/values, and the final dict + contains the lengths of any dense feature_list features. Raises: ValueError: if any feature is invalid. -- GitLab From 8ef1ece7d0ecdec633a22a8100fdae05cfbacb3e Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 17 Sep 2018 16:31:24 -0700 Subject: [PATCH 0295/1357] [tf.data] Introducing `tf.data.Dataset.window(size, shift, stride, drop_remainder)`, which can be used for combining elements of input dataset into "windows". A window is itself a finite dataset and, among other things, can be used for generalized batching (see https://github.com/tensorflow/community/pull/5 for details). PiperOrigin-RevId: 213360134 --- .../kernel_tests/window_dataset_op_test.py | 7 +- .../contrib/data/python/ops/grouping.py | 51 +-- tensorflow/contrib/data/python/ops/sliding.py | 4 + .../base_api/api_def_WindowDataset.pbtxt | 23 +- .../core/kernels/data/window_dataset_op.cc | 215 +++++++++++-- .../core/ops/compat/ops_history.v1.pbtxt | 14 +- tensorflow/core/ops/dataset_ops.cc | 10 +- tensorflow/python/data/kernel_tests/BUILD | 17 + .../kernel_tests/window_dataset_op_test.py | 295 ++++++++++++++++++ tensorflow/python/data/ops/dataset_ops.py | 93 +++++- .../golden/v1/tensorflow.data.-dataset.pbtxt | 4 + ...ow.data.-fixed-length-record-dataset.pbtxt | 4 + .../tensorflow.data.-t-f-record-dataset.pbtxt | 4 + .../tensorflow.data.-text-line-dataset.pbtxt | 4 + .../golden/v2/tensorflow.data.-dataset.pbtxt | 4 + ...ow.data.-fixed-length-record-dataset.pbtxt | 4 + .../tensorflow.data.-t-f-record-dataset.pbtxt | 4 + .../tensorflow.data.-text-line-dataset.pbtxt | 4 + 18 files changed, 679 insertions(+), 82 deletions(-) create mode 100644 tensorflow/python/data/kernel_tests/window_dataset_op_test.py diff --git a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py index 6eaa0b1959..8b7b3ac0f7 100644 --- a/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/window_dataset_op_test.py @@ -89,13 +89,14 @@ class WindowDatasetTest(test.TestCase, parameterized.TestCase): return dataset_ops.Dataset.zip( tuple([fn(*arg) if isinstance(arg, tuple) else arg for arg in args])) - dataset = self._structuredDataset(structure, shape, dtype).apply( + dataset = self._structuredDataset(structure, shape, dtype).repeat(5).apply( grouping.window_dataset(5)).flat_map(fn) get_next = dataset.make_one_shot_iterator().get_next() with self.cached_session() as sess: expected = sess.run(self._structuredElement(structure, shape, dtype)) - actual = sess.run(get_next) - self._assertEqual(expected, actual) + for _ in range(5): + actual = sess.run(get_next) + self._assertEqual(expected, actual) @parameterized.named_parameters( ("1", None, np.int32([]), dtypes.bool), diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 099e10db92..020167e4d1 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -255,6 +255,7 @@ def _map_x_dataset(map_func): return _apply_fn +# TODO(b/115382007) Remove this once canned reducers move to core. def window_dataset(window_size): """A transformation that creates window datasets from the input dataset. @@ -271,7 +272,12 @@ def window_dataset(window_size): """ def _apply_fn(dataset): - return _WindowDataset(dataset, window_size) + return dataset_ops.WindowDataset( + dataset, + size=window_size, + shift=window_size, + stride=1, + drop_remainder=False) return _apply_fn @@ -556,46 +562,3 @@ class _MapXDataset(dataset_ops.Dataset): @property def output_types(self): return self._output_types - - -class _WindowDataset(dataset_ops.Dataset): - """A dataset that creates window datasets from the input elements.""" - - def __init__(self, input_dataset, window_size): - """See `window_dataset()` for more details.""" - super(_WindowDataset, self).__init__() - self._input_dataset = input_dataset - self._window_size = ops.convert_to_tensor( - window_size, dtype=dtypes.int64, name="window_size") - self._output_classes = nest.pack_sequence_as( - input_dataset.output_classes, - [ - dataset_ops._NestedDatasetComponent( # pylint: disable=protected-access - output_classes=output_class, - output_shapes=output_shape, - output_types=output_type) - for output_class, output_shape, output_type in zip( - nest.flatten(input_dataset.output_classes), - nest.flatten(input_dataset.output_shapes), - nest.flatten(input_dataset.output_types)) - ]) - self._output_shapes = self._output_classes - self._output_types = self._output_classes - - def _as_variant_tensor(self): - return gen_dataset_ops.window_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._window_size, - **dataset_ops.flat_structure(self)) - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py index 8025dcdd16..b0d6a16c20 100644 --- a/tensorflow/contrib/data/python/ops/sliding.py +++ b/tensorflow/contrib/data/python/ops/sliding.py @@ -67,6 +67,10 @@ class _SlideDataset(dataset_ops.Dataset): @deprecation.deprecated_args( None, "stride is deprecated, use window_shift instead", "stride") +@deprecation.deprecated( + None, "Use `tf.data.Dataset.window(size=window_size, shift=window_shift, " + "stride=window_stride).flat_map(lambda x: x.batch(window.size))` " + "instead.") def sliding_window_batch(window_size, stride=None, window_shift=None, diff --git a/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt index 1bc3660479..01387b7527 100644 --- a/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_WindowDataset.pbtxt @@ -2,9 +2,30 @@ op { visibility: HIDDEN graph_op_name: "WindowDataset" in_arg { - name: "window_size" + name: "size" description: <(ctx, "window_size", &window_size)); + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "size", &window_size)); OP_REQUIRES( ctx, window_size > 0, errors::InvalidArgument("Window size must be greater than zero.")); - *output = new Dataset(ctx, window_size, input); + int64 window_shift = 0; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "shift", &window_shift)); + OP_REQUIRES( + ctx, window_shift > 0, + errors::InvalidArgument("Window shift must be greater than zero.")); + + int64 window_stride = 0; + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "stride", &window_stride)); + OP_REQUIRES( + ctx, window_stride > 0, + errors::InvalidArgument("Window stride must be greater than zero.")); + + bool drop_remainder; + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "drop_remainder", &drop_remainder)); + + *output = new Dataset(ctx, input, window_size, window_shift, window_stride, + drop_remainder); } private: class Dataset : public DatasetBase { public: - Dataset(OpKernelContext* ctx, int64 window_size, const DatasetBase* input) + Dataset(OpKernelContext* ctx, const DatasetBase* input, int64 window_size, + int64 window_shift, int64 window_stride, bool drop_remainder) : DatasetBase(DatasetContext(ctx)), + input_(input), window_size_(window_size), - input_(input) { + window_shift_(window_shift), + window_stride_(window_stride), + drop_remainder_(drop_remainder) { input_->Ref(); } @@ -72,7 +94,8 @@ class WindowDatasetOp : public UnaryDatasetOpKernel { } string DebugString() const override { - return strings::StrCat("WindowDatasetOp(", window_size_, ")::Dataset"); + return strings::StrCat("WindowDatasetOp(", window_size_, window_shift_, + window_stride_, drop_remainder_, ")::Dataset"); } protected: @@ -81,10 +104,19 @@ class WindowDatasetOp : public UnaryDatasetOpKernel { Node** output) const override { Node* input_graph_node = nullptr; TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); - Node* window_size = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size)); + Node* window_size_node = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size_node)); + Node* window_shift_node = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(window_shift_, &window_shift_node)); + Node* window_stride_node = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(window_stride_, &window_stride_node)); + Node* drop_remainder_node = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder_node)); TF_RETURN_IF_ERROR( - b->AddDataset(this, {input_graph_node, window_size}, output)); + b->AddDataset(this, + {input_graph_node, window_size_node, window_shift_node, + window_stride_node, drop_remainder_node}, + output)); return Status::OK(); } @@ -101,37 +133,79 @@ class WindowDatasetOp : public UnaryDatasetOpKernel { Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { - // Each row of `window_elements` is a tuple of tensors from the - // input iterator. + const int64 window_size = dataset()->window_size_; + const int64 window_shift = dataset()->window_shift_; + const int64 window_stride = dataset()->window_stride_; std::vector> window_elements; + Status status = Status::OK(); { mutex_lock l(mu_); - if (!input_impl_) { + if (!input_impl_ && buffer_.empty()) { *end_of_sequence = true; return Status::OK(); } - window_elements.reserve(dataset()->window_size_); - *end_of_sequence = false; - for (int i = 0; i < dataset()->window_size_ && !*end_of_sequence; - ++i) { - std::vector window_element_tuple; - TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &window_element_tuple, - end_of_sequence)); - if (!*end_of_sequence) { - window_elements.emplace_back(std::move(window_element_tuple)); - } else { - input_impl_.reset(); + + // Add elements to the buffer. + size_t target_size = TargetBufferSize(window_size, window_stride); + if (input_impl_) { + *end_of_sequence = false; + for (size_t i = buffer_.size(); + i < target_size && !*end_of_sequence; ++i) { + std::vector element; + Status status = + input_impl_->GetNext(ctx, &element, end_of_sequence); + if (!*end_of_sequence) { + buffer_.emplace_back(std::move(element), status); + } else { + input_impl_.reset(); + } } } + + // If there are not enough elements and `drop_remainder` is set, we do + // not wish to return a smaller window. + if (buffer_.empty() || + (dataset()->drop_remainder_ && buffer_.size() < target_size)) { + DCHECK(*end_of_sequence); + return Status::OK(); + } + + int num_elements = 1 + (buffer_.size() - 1) / window_stride; + window_elements.reserve(num_elements); + for (size_t i = 0; i < num_elements; ++i) { + status.Update(buffer_[window_stride * i].status); + if (!status.ok()) { + break; + } + window_elements.emplace_back(buffer_[window_stride * i].result); + } + + // Shift the window, discarding elements if necessary. + int buffer_size = buffer_.size(); + if (window_shift >= buffer_size) { + for (size_t i = buffer_size; input_impl_ && i < window_shift; ++i) { + bool end_of_input; + std::vector element; + // Ignore non-error status of discarded elements. + input_impl_->GetNext(ctx, &element, &end_of_input).IgnoreError(); + if (end_of_input) { + input_impl_.reset(); + } + } + buffer_.clear(); + } else { + buffer_.erase(buffer_.begin(), buffer_.begin() + window_shift); + } } - if (window_elements.empty()) { - DCHECK(*end_of_sequence); - return Status::OK(); + if (!status.ok()) { + return status; } + // Construct output tensors. const size_t num_tuple_components = window_elements[0].size(); const int64 num_window_elements = window_elements.size(); + *end_of_sequence = false; for (size_t idx = 0; idx < num_tuple_components; ++idx) { DatasetBase* window_dataset; std::vector> window_component_elements; @@ -154,7 +228,6 @@ class WindowDatasetOp : public UnaryDatasetOpKernel { TF_RETURN_IF_ERROR(StoreDatasetInVariantTensor(window_dataset, &out_tensors->back())); } - *end_of_sequence = false; return Status::OK(); } @@ -167,6 +240,20 @@ class WindowDatasetOp : public UnaryDatasetOpKernel { } else { TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_)); } + // Save buffer. + TF_RETURN_IF_ERROR(writer->WriteScalar(strings::StrCat("buffer_size"), + buffer_.size())); + for (int64 i = 0; i < buffer_.size(); i++) { + TF_RETURN_IF_ERROR(WriteStatusLocked(writer, i, buffer_[i].status)); + TF_RETURN_IF_ERROR( + writer->WriteScalar(strings::StrCat("buffer[", i, "].size"), + buffer_[i].result.size())); + for (int64 j = 0; j < buffer_[i].result.size(); j++) { + TF_RETURN_IF_ERROR( + writer->WriteTensor(strings::StrCat("buffer[", i, "][", j, "]"), + buffer_[i].result[j])); + } + } return Status::OK(); } @@ -178,22 +265,92 @@ class WindowDatasetOp : public UnaryDatasetOpKernel { } else { input_impl_.reset(); } + // Restore buffer. + int64 buffer_size; + TF_RETURN_IF_ERROR( + reader->ReadScalar(strings::StrCat("buffer_size"), &buffer_size)); + buffer_.resize(buffer_size); + for (int64 i = 0; i < buffer_size; i++) { + int64 vector_size; + TF_RETURN_IF_ERROR(ReadStatusLocked(reader, i, &buffer_[i].status)); + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat("buffer[", i, "].size"), &vector_size)); + buffer_[i].result.resize(vector_size); + for (int64 j = 0; j < vector_size; j++) { + TF_RETURN_IF_ERROR( + reader->ReadTensor(strings::StrCat("buffer[", i, "][", j, "]"), + &buffer_[i].result[j])); + } + } return Status::OK(); } private: + struct InvocationResult { + InvocationResult() = default; + InvocationResult(std::vector&& result, const Status& status) + : result(result), status(status) {} + + std::vector result; + Status status; + }; + + Status WriteStatusLocked(IteratorStateWriter* writer, size_t index, + const Status& status) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + CodeKey(index), static_cast(status.code()))); + if (!status.ok()) { + TF_RETURN_IF_ERROR(writer->WriteScalar(ErrorMessageKey(index), + status.error_message())); + } + return Status::OK(); + } + + Status ReadStatusLocked(IteratorStateReader* reader, size_t index, + Status* status) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + int64 code_int; + TF_RETURN_IF_ERROR(reader->ReadScalar(CodeKey(index), &code_int)); + error::Code code = static_cast(code_int); + + if (code != error::Code::OK) { + string error_message; + TF_RETURN_IF_ERROR( + reader->ReadScalar(ErrorMessageKey(index), &error_message)); + *status = Status(code, error_message); + } else { + *status = Status::OK(); + } + return Status::OK(); + } + + string CodeKey(size_t index) { + return full_name(strings::StrCat("buffer[", index, "].code")); + } + + string ErrorMessageKey(size_t index) { + return full_name(strings::StrCat("buffer[", index, "].error_message")); + } + + size_t TargetBufferSize(int64 window_size, int64 window_stride) { + return (window_size - 1) * window_stride + 1; + } + mutex mu_; + std::deque buffer_ GUARDED_BY(mu_); std::unique_ptr input_impl_ GUARDED_BY(mu_); }; - const int64 window_size_; const DatasetBase* const input_; + const int64 window_size_; + const int64 window_shift_; + const int64 window_stride_; + const bool drop_remainder_; }; }; REGISTER_KERNEL_BUILDER(Name("WindowDataset").Device(DEVICE_CPU), WindowDatasetOp); - } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 57c6bda98b..e59958749c 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -75602,9 +75602,21 @@ op { type: DT_VARIANT } input_arg { - name: "window_size" + name: "size" + type: DT_INT64 + } + input_arg { + name: "shift" + type: DT_INT64 + } + input_arg { + name: "stride" type: DT_INT64 } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } output_arg { name: "handle" type: DT_VARIANT diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 7d9e7b2d3f..4d3f272c1b 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -396,14 +396,20 @@ REGISTER_OP("FilterByLastComponentDataset") REGISTER_OP("WindowDataset") .Input("input_dataset: variant") - .Input("window_size: int64") + .Input("size: int64") + .Input("shift: int64") + .Input("stride: int64") + .Input("drop_remainder: bool") .Output("handle: variant") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn([](shape_inference::InferenceContext* c) { shape_inference::ShapeHandle unused; - // batch_size should be a scalar. + // size, shift, stride, and drop_remainder should be scalars. TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); return shape_inference::ScalarShape(c); }); diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index 631b87a718..17d4fec662 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -407,3 +407,20 @@ cuda_py_test( "//tensorflow/python:tensor_shape", ], ) + +tf_py_test( + name = "window_dataset_op_test", + size = "small", + srcs = ["window_dataset_op_test.py"], + additional_deps = [ + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python/data/ops:dataset_ops", + ], +) diff --git a/tensorflow/python/data/kernel_tests/window_dataset_op_test.py b/tensorflow/python/data/kernel_tests/window_dataset_op_test.py new file mode 100644 index 0000000000..fd4348426d --- /dev/null +++ b/tensorflow/python/data/kernel_tests/window_dataset_op_test.py @@ -0,0 +1,295 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class WindowDatasetTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ("1", 20, 14, 7, 1), + ("2", 20, 17, 9, 1), + ("3", 20, 14, 14, 1), + ("4", 20, 10, 14, 1), + ("5", 20, 14, 19, 1), + ("6", 20, 4, 1, 2), + ("7", 20, 2, 1, 6), + ("8", 20, 4, 7, 2), + ("9", 20, 2, 7, 6), + ("10", 1, 10, 4, 1), + ("11", 0, 10, 4, 1), + ("12", 20, 14, 7, 1, False), + ("13", 20, 17, 9, 1, False), + ("14", 20, 14, 14, 1, False), + ("15", 20, 10, 14, 1, False), + ("16", 20, 14, 19, 1, False), + ("17", 20, 4, 1, 2, False), + ("18", 20, 2, 1, 6, False), + ("19", 20, 4, 7, 2, False), + ("20", 20, 2, 7, 6, False), + ("21", 1, 10, 4, 1, False), + ("22", 0, 10, 4, 1, False), + ) + def testWindowDataset(self, count, size, shift, stride, drop_remainder=True): + """Tests a dataset that slides a window its input elements.""" + components = (np.arange(7), + np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], + np.array(37.0) * np.arange(7)) + + count_t = array_ops.placeholder(dtypes.int64, shape=[]) + size_t = array_ops.placeholder(dtypes.int64, shape=[]) + shift_t = array_ops.placeholder(dtypes.int64, shape=[]) + stride_t = array_ops.placeholder(dtypes.int64, shape=[]) + drop_remainder_t = array_ops.placeholder(dtypes.bool, shape=[]) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + def _flat_map_fn(x, y, z): + return dataset_ops.Dataset.zip((x.batch(batch_size=size_t), + y.batch(batch_size=size_t), + z.batch(batch_size=size_t))) + + iterator = dataset_ops.Dataset.from_tensor_slices(components).map( + _map_fn).repeat(count).window( + size=size_t, + shift=shift_t, + stride=stride_t, + drop_remainder=drop_remainder_t).flat_map( + _flat_map_fn).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([[None] + list(c.shape[1:]) for c in components], + [t.shape.as_list() for t in get_next]) + + with self.cached_session() as sess: + sess.run( + init_op, + feed_dict={ + count_t: count, + size_t: size, + shift_t: shift, + stride_t: stride, + drop_remainder_t: drop_remainder + }) + num_full_batches = max( + 0, (count * 7 - ((size - 1) * stride + 1)) // shift + 1) + for i in range(num_full_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(size): + self.assertAllEqual(component[(i * shift + j * stride) % 7]**2, + result_component[j]) + if not drop_remainder: + num_partial_batches = (count * 7) // shift + ( + (count * 7) % shift > 0) - num_full_batches + for i in range(num_partial_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + remaining = (count * 7) - ((num_full_batches + i) * shift) + num_elements = remaining // stride + ((remaining % stride) > 0) + for j in range(num_elements): + self.assertAllEqual( + component[((num_full_batches + i) * shift + j * stride) % 7] + **2, result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + @parameterized.named_parameters( + ("1", 14, 0, 3, 1), + ("2", 14, 3, 0, 1), + ("3", 14, 3, 3, 0), + ) + def testWindowDatasetInvalid(self, count, size, shift, stride): + count_t = array_ops.placeholder(dtypes.int64, shape=[]) + size_t = array_ops.placeholder(dtypes.int64, shape=[]) + shift_t = array_ops.placeholder(dtypes.int64, shape=[]) + stride_t = array_ops.placeholder(dtypes.int64, shape=[]) + + iterator = dataset_ops.Dataset.range(10).map(lambda x: x).repeat( + count_t).window( + size=size_t, shift=shift_t, + stride=stride_t).flat_map(lambda x: x.batch(batch_size=size_t) + ).make_initializable_iterator() + init_op = iterator.initializer + + with self.cached_session() as sess: + with self.assertRaises(errors.InvalidArgumentError): + sess.run( + init_op, + feed_dict={ + count_t: count, + size_t: size, + shift_t: shift, + stride_t: stride + }) + + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testWindowSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).window( + size=5, shift=3, drop_remainder=True).flat_map( + lambda x: x.batch(batch_size=5)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.cached_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], + dense_shape=[5, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testWindowSparseWithDifferentDenseShapes(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=array_ops.expand_dims( + math_ops.range(i, dtype=dtypes.int64), 1), + values=array_ops.fill([math_ops.to_int32(i)], i), + dense_shape=[i]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).window( + size=5, shift=3, drop_remainder=True).flat_map( + lambda x: x.batch(batch_size=5)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.cached_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected_indices = [] + expected_values = [] + for j in range(5): + for k in range(i * 3 + j): + expected_indices.append([j, k]) + expected_values.append(i * 3 + j) + expected = sparse_tensor.SparseTensorValue( + indices=expected_indices, + values=expected_values, + dense_shape=[5, i * 3 + 5 - 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testNestedWindowSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).window( + size=4, shift=2, + drop_remainder=True).flat_map(lambda x: x.batch(batch_size=4)).window( + size=3, shift=1, drop_remainder=True).flat_map( + lambda x: x.batch(batch_size=3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.cached_session() as sess: + sess.run(init_op) + # Slide: 1st batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [1, 0, 0], + [1, 1, 0], [1, 2, 0], [1, 3, 0], [2, 0, 0], [2, 1, 0], + [2, 2, 0], [2, 3, 0]], + values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + # Slide: 2nd batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], [1, 0, 0], + [1, 1, 0], [1, 2, 0], [1, 3, 0], [2, 0, 0], [2, 1, 0], + [2, 2, 0], [2, 3, 0]], + values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testWindowShapeError(self): + + def generator(): + yield [1.0, 2.0, 3.0] + yield [4.0, 5.0, 6.0] + yield [7.0, 8.0, 9.0, 10.0] + + iterator = dataset_ops.Dataset.from_generator( + generator, dtypes.float32, output_shapes=[None]).window( + size=3, shift=1).flat_map( + lambda x: x.batch(batch_size=3)).make_initializable_iterator() + next_element = iterator.get_next() + + with self.cached_session() as sess: + sess.run(iterator.initializer) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r"Cannot batch tensors with different shapes in component 0. " + r"First element had shape \[3\] and element 2 had shape \[4\]."): + sess.run(next_element) + + def testWindowIgnoreErrors(self): + input_values = np.float32([1., np.nan, 2., np.nan, 3.]) + dataset = dataset_ops.Dataset.from_tensor_slices(input_values).map( + lambda x: array_ops.check_numerics(x, "message")).window( + size=2, shift=2, stride=2, + drop_remainder=True).flat_map(lambda x: x.batch(batch_size=2)) + get_next = dataset.make_one_shot_iterator().get_next() + + with self.cached_session() as sess: + self.assertAllEqual(np.float32([1., 2.]), sess.run(get_next)) + self.assertAllEqual(np.float32([2., 3.]), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index c985e00dd1..93b3a7b93b 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1115,7 +1115,7 @@ class Dataset(object): return FilterDataset(self, predicate) def apply(self, transformation_func): - """Apply a transformation function to this dataset. + """Applies a transformation function to this dataset. `apply` enables chaining of custom `Dataset` transformations, which are represented as functions that take one `Dataset` argument and return a @@ -1131,7 +1131,7 @@ class Dataset(object): Args: transformation_func: A function that takes one `Dataset` argument and - returns a `Dataset`. + returns a `Dataset`. Returns: Dataset: The `Dataset` returned by applying `transformation_func` to this @@ -1142,6 +1142,45 @@ class Dataset(object): raise TypeError("`transformation_func` must return a Dataset.") return dataset + def window(self, size, shift=None, stride=1, drop_remainder=False): + """Combines input elements into a dataset of windows. + + Each window is a dataset itself and contains `size` elements (or + possibly fewer if there are not enough input elements to fill the window + and `drop_remainder` evaluates to false). + + The `stride` argument determines the stride of the input elements, + and the `shift` argument determines the shift of the window. + + For example: + - `tf.data.Dataset.range(7).window(2)` produces + `{{0, 1}, {2, 3}, {4, 5}, {6}}` + - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces + `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}` + - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces + `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}` + + Args: + size: A `tf.int64` scalar `tf.Tensor`, representing the number of elements + of the input dataset to combine into a window. + shift: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + forward shift of the sliding window in each iteration. Defaults to + `size`. + stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + stride of the input elements in the sliding window. + drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing + whether a window should be dropped in case its size is smaller than + `window_size`. + + Returns: + Dataset: A `Dataset` of windows, each of which is a nested `Dataset` with + the same structure as this dataset, but a finite subsequence of its + elements. + """ + if shift is None: + shift = size + return WindowDataset(self, size, shift, stride, drop_remainder) + class TensorDataset(Dataset): """A `Dataset` with a single element, viz. a nested structure of tensors.""" @@ -2442,3 +2481,53 @@ class PrefetchDataset(Dataset): @property def output_types(self): return self._input_dataset.output_types + + +class WindowDataset(Dataset): + """A dataset that creates window datasets from the input elements.""" + + def __init__(self, input_dataset, size, shift, stride, drop_remainder): + """See `window_dataset()` for more details.""" + super(WindowDataset, self).__init__() + self._input_dataset = input_dataset + self._size = ops.convert_to_tensor(size, dtype=dtypes.int64, name="size") + self._shift = ops.convert_to_tensor(shift, dtype=dtypes.int64, name="shift") + self._stride = ops.convert_to_tensor( + stride, dtype=dtypes.int64, name="stride") + self._drop_remainder = ops.convert_to_tensor( + drop_remainder, dtype=dtypes.bool, name="drop_remainder") + self._output_classes = nest.pack_sequence_as( + input_dataset.output_classes, + [ + _NestedDatasetComponent( # pylint: disable=protected-access + output_classes=output_class, + output_shapes=output_shape, + output_types=output_type) + for output_class, output_shape, output_type in zip( + nest.flatten(input_dataset.output_classes), + nest.flatten(input_dataset.output_shapes), + nest.flatten(input_dataset.output_types)) + ]) + self._output_shapes = self._output_classes + self._output_types = self._output_classes + + def _as_variant_tensor(self): + return gen_dataset_ops.window_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._size, + self._shift, + self._stride, + self._drop_remainder, + **flat_structure(self)) + + @property + def output_classes(self): + return self._output_classes + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt index 87745420ee..c3ba2dba57 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt @@ -110,6 +110,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt index 6dd46365b0..3541671bee 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -111,6 +111,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt index 35b7105eba..b113c18ee0 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt @@ -111,6 +111,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt index 8ae370af98..7210bf5db4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt @@ -111,6 +111,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt index 87745420ee..c3ba2dba57 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt @@ -110,6 +110,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt index 6dd46365b0..3541671bee 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -111,6 +111,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt index 35b7105eba..b113c18ee0 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt @@ -111,6 +111,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt index 8ae370af98..7210bf5db4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt @@ -111,6 +111,10 @@ tf_class { name: "take" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "window" + argspec: "args=[\'self\', \'size\', \'shift\', \'stride\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'False\'], " + } member_method { name: "zip" argspec: "args=[\'datasets\'], varargs=None, keywords=None, defaults=None" -- GitLab From 0b80d098704c72f627f37bfeee0ae19788c06fa8 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Mon, 17 Sep 2018 16:32:12 -0700 Subject: [PATCH 0296/1357] Add basic op resolver registration to TFLite C API PiperOrigin-RevId: 213360279 --- tensorflow/contrib/lite/experimental/c/BUILD | 2 ++ .../contrib/lite/experimental/c/c_api.cc | 4 +++ .../contrib/lite/experimental/c/c_api.h | 3 +- .../lite/experimental/c/c_api_experimental.cc | 16 +++++++++ .../lite/experimental/c/c_api_experimental.h | 25 ++++++++++++++ .../experimental/c/c_api_experimental_test.cc | 23 ++++++++++--- .../lite/experimental/c/c_api_internal.h | 2 ++ .../contrib/lite/mutable_op_resolver.cc | 15 ++++++-- tensorflow/contrib/lite/mutable_op_resolver.h | 8 +++-- .../contrib/lite/mutable_op_resolver_test.cc | 34 +++++++++++++++++++ 10 files changed, 122 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/lite/experimental/c/BUILD b/tensorflow/contrib/lite/experimental/c/BUILD index ea4a543252..835fc2595e 100644 --- a/tensorflow/contrib/lite/experimental/c/BUILD +++ b/tensorflow/contrib/lite/experimental/c/BUILD @@ -68,6 +68,7 @@ cc_library( deps = [ ":c_api", ":c_api_internal", + "//tensorflow/contrib/lite:kernel_api", ], ) @@ -93,6 +94,7 @@ cc_test( deps = [ ":c_api", ":c_api_experimental", + "//tensorflow/contrib/lite:kernel_api", "//tensorflow/contrib/lite/testing:util", "@com_google_googletest//:gtest", ], diff --git a/tensorflow/contrib/lite/experimental/c/c_api.cc b/tensorflow/contrib/lite/experimental/c/c_api.cc index c589cf71ea..1c3996fb87 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api.cc @@ -62,7 +62,11 @@ TFL_Interpreter* TFL_NewInterpreter( return nullptr; } + // TODO(b/111881878): Allow use of C API without pulling in all builtin ops. tflite::ops::builtin::BuiltinOpResolver resolver; + if (optional_options) { + resolver.AddAll(optional_options->op_resolver); + } tflite::InterpreterBuilder builder(*model->impl, resolver); std::unique_ptr interpreter; if (builder(&interpreter) != kTfLiteOk) { diff --git a/tensorflow/contrib/lite/experimental/c/c_api.h b/tensorflow/contrib/lite/experimental/c/c_api.h index b429e76870..44b936aa87 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api.h +++ b/tensorflow/contrib/lite/experimental/c/c_api.h @@ -52,8 +52,9 @@ limitations under the License. extern "C" { #endif // __cplusplus -typedef TfLiteTensor TFL_Tensor; +typedef TfLiteRegistration TFL_Registration; typedef TfLiteStatus TFL_Status; +typedef TfLiteTensor TFL_Tensor; typedef TfLiteType TFL_Type; // -------------------------------------------------------------------------- diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc index c4dbc55cbf..0f16595811 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc @@ -26,6 +26,22 @@ TFL_Status TFL_InterpreterResetVariableTensorsToZero( return interpreter->impl->ResetVariableTensorsToZero(); } +void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options, + TFL_BuiltinOperator op, + const TFL_Registration* registration, + int32_t min_version, + int32_t max_version) { + options->op_resolver.AddBuiltin(static_cast(op), + registration, min_version, max_version); +} + +void TFL_InterpreterOptionsAddCustomOp(TFL_InterpreterOptions* options, + const char* name, + const TFL_Registration* registration, + int min_version, int max_version) { + options->op_resolver.AddCustom(name, registration, min_version, max_version); +} + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h index b0ac258dcf..b8de7b9964 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h @@ -15,16 +15,41 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_EXPERIMENTAL_H_ #define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_EXPERIMENTAL_H_ +#include "tensorflow/contrib/lite/builtin_ops.h" #include "tensorflow/contrib/lite/experimental/c/c_api.h" #ifdef __cplusplus extern "C" { #endif // __cplusplus +typedef TfLiteBuiltinOperator TFL_BuiltinOperator; + // Resets all variable tensors to zero. TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensorsToZero( TFL_Interpreter* interpreter); +// Adds an op registration for a builtin operator. +// +// NOTE: The interpreter will make a copy of `registration` internally, so the +// caller should ensure that its contents (function pointers, etc...) remain +// valid for the duration of the interpreter's lifetime. A common practice is +// making the provided TFL_Registration instance static. +void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options, + TFL_BuiltinOperator op, + const TFL_Registration* registration, + int min_version, int max_version); + +// Adds an op registration for a custom operator. +// +// NOTE: The interpreter will make a copy of `registration` internally, so the +// caller should ensure that its contents (function pointers, etc...) remain +// valid for the duration of the interpreter's lifetime. A common practice is +// making the provided TFL_Registration instance static. +void TFL_InterpreterOptionsAddCustomOp(TFL_InterpreterOptions* options, + const char* name, + const TFL_Registration* registration, + int min_version, int max_version); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc index db6e5251de..d86ad00d6d 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc @@ -16,25 +16,40 @@ limitations under the License. #include "tensorflow/contrib/lite/experimental/c/c_api_experimental.h" #include +#include "tensorflow/contrib/lite/builtin_ops.h" #include "tensorflow/contrib/lite/experimental/c/c_api.h" #include "tensorflow/contrib/lite/testing/util.h" namespace { +TfLiteRegistration* GetDummyRegistration() { + static TfLiteRegistration registration = { + .init = nullptr, + .free = nullptr, + .prepare = nullptr, + .invoke = [](TfLiteContext*, TfLiteNode*) { return kTfLiteOk; }, + }; + return ®istration; +} + TEST(CApiExperimentalSimple, Smoke) { TFL_Model* model = TFL_NewModelFromFile( "tensorflow/contrib/lite/testdata/add.bin"); ASSERT_NE(model, nullptr); - TFL_Interpreter* interpreter = - TFL_NewInterpreter(model, /*optional_options=*/nullptr); + TFL_InterpreterOptions* options = TFL_NewInterpreterOptions(); + TFL_InterpreterOptionsAddBuiltinOp(options, kTfLiteBuiltinAdd, + GetDummyRegistration(), 1, 1); + + TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options); ASSERT_NE(interpreter, nullptr); ASSERT_EQ(TFL_InterpreterAllocateTensors(interpreter), kTfLiteOk); - EXPECT_EQ(TFL_InterpreterResetVariableTensorsToZero(interpreter), kTfLiteOk); + EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk); - TFL_DeleteModel(model); TFL_DeleteInterpreter(interpreter); + TFL_DeleteInterpreterOptions(options); + TFL_DeleteModel(model); } } // namespace diff --git a/tensorflow/contrib/lite/experimental/c/c_api_internal.h b/tensorflow/contrib/lite/experimental/c/c_api_internal.h index 60c2e4e2cd..af675ac98a 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_internal.h +++ b/tensorflow/contrib/lite/experimental/c/c_api_internal.h @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/op_resolver.h" // Internal structures used by the C API. These are likely to change and should // not be depended on. @@ -33,6 +34,7 @@ struct TFL_InterpreterOptions { kDefaultNumThreads = -1, }; int num_threads = kDefaultNumThreads; + tflite::MutableOpResolver op_resolver; }; struct TFL_Interpreter { diff --git a/tensorflow/contrib/lite/mutable_op_resolver.cc b/tensorflow/contrib/lite/mutable_op_resolver.cc index d7c0181720..a36404399b 100644 --- a/tensorflow/contrib/lite/mutable_op_resolver.cc +++ b/tensorflow/contrib/lite/mutable_op_resolver.cc @@ -30,7 +30,7 @@ const TfLiteRegistration* MutableOpResolver::FindOp(const char* op, } void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op, - TfLiteRegistration* registration, + const TfLiteRegistration* registration, int min_version, int max_version) { for (int version = min_version; version <= max_version; ++version) { TfLiteRegistration new_registration = *registration; @@ -43,7 +43,7 @@ void MutableOpResolver::AddBuiltin(tflite::BuiltinOperator op, } void MutableOpResolver::AddCustom(const char* name, - TfLiteRegistration* registration, + const TfLiteRegistration* registration, int min_version, int max_version) { for (int version = min_version; version <= max_version; ++version) { TfLiteRegistration new_registration = *registration; @@ -55,4 +55,15 @@ void MutableOpResolver::AddCustom(const char* name, } } +void MutableOpResolver::AddAll(const MutableOpResolver& other) { + // map::insert does not replace existing elements, and map::insert_or_assign + // wasn't added until C++17. + for (const auto& other_builtin : other.builtins_) { + builtins_[other_builtin.first] = other_builtin.second; + } + for (const auto& other_custom_op : other.custom_ops_) { + custom_ops_[other_custom_op.first] = other_custom_op.second; + } +} + } // namespace tflite diff --git a/tensorflow/contrib/lite/mutable_op_resolver.h b/tensorflow/contrib/lite/mutable_op_resolver.h index c319041e9b..efd6cfac2a 100644 --- a/tensorflow/contrib/lite/mutable_op_resolver.h +++ b/tensorflow/contrib/lite/mutable_op_resolver.h @@ -57,10 +57,12 @@ class MutableOpResolver : public OpResolver { const TfLiteRegistration* FindOp(tflite::BuiltinOperator op, int version) const override; const TfLiteRegistration* FindOp(const char* op, int version) const override; - void AddBuiltin(tflite::BuiltinOperator op, TfLiteRegistration* registration, - int min_version = 1, int max_version = 1); - void AddCustom(const char* name, TfLiteRegistration* registration, + void AddBuiltin(tflite::BuiltinOperator op, + const TfLiteRegistration* registration, int min_version = 1, + int max_version = 1); + void AddCustom(const char* name, const TfLiteRegistration* registration, int min_version = 1, int max_version = 1); + void AddAll(const MutableOpResolver& other); private: typedef std::pair BuiltinOperatorKey; diff --git a/tensorflow/contrib/lite/mutable_op_resolver_test.cc b/tensorflow/contrib/lite/mutable_op_resolver_test.cc index db690eaab9..b70c703839 100644 --- a/tensorflow/contrib/lite/mutable_op_resolver_test.cc +++ b/tensorflow/contrib/lite/mutable_op_resolver_test.cc @@ -36,6 +36,20 @@ TfLiteRegistration* GetDummyRegistration() { return ®istration; } +TfLiteStatus Dummy2Invoke(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteRegistration* GetDummy2Registration() { + static TfLiteRegistration registration = { + .init = nullptr, + .free = nullptr, + .prepare = nullptr, + .invoke = Dummy2Invoke, + }; + return ®istration; +} + TEST(MutableOpResolverTest, FinOp) { MutableOpResolver resolver; resolver.AddBuiltin(BuiltinOperator_ADD, GetDummyRegistration()); @@ -119,6 +133,26 @@ TEST(MutableOpResolverTest, FindCustomOpWithUnsupportedVersion) { EXPECT_EQ(found_registration, nullptr); } +TEST(MutableOpResolverTest, AddAll) { + MutableOpResolver resolver1; + resolver1.AddBuiltin(BuiltinOperator_ADD, GetDummyRegistration()); + resolver1.AddBuiltin(BuiltinOperator_MUL, GetDummy2Registration()); + + MutableOpResolver resolver2; + resolver2.AddBuiltin(BuiltinOperator_SUB, GetDummyRegistration()); + resolver2.AddBuiltin(BuiltinOperator_ADD, GetDummy2Registration()); + + // resolver2's ADD op should replace resolver1's ADD op, while augmenting + // non-overlapping ops. + resolver1.AddAll(resolver2); + ASSERT_EQ(resolver1.FindOp(BuiltinOperator_ADD, 1)->invoke, + GetDummy2Registration()->invoke); + ASSERT_EQ(resolver1.FindOp(BuiltinOperator_MUL, 1)->invoke, + GetDummy2Registration()->invoke); + ASSERT_EQ(resolver1.FindOp(BuiltinOperator_SUB, 1)->invoke, + GetDummyRegistration()->invoke); +} + } // namespace } // namespace tflite -- GitLab From 7a67406abda84cb5c2da02ed4d77a85ddfd2a417 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Fri, 14 Sep 2018 12:38:07 -0700 Subject: [PATCH 0297/1357] Update 1.11.0-rc0 version strings to 1.11.0-rc1 (#22284) --- tensorflow/core/public/version.h | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1f71e24eeb..b043a69431 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 8442e58f20..d40ffb8cd0 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.11.0-rc0' +_VERSION = '1.11.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 0cdf60ff8239a68326af9610e715f42c773be731 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 16:41:38 -0700 Subject: [PATCH 0298/1357] Make HLO liveness analysis correctly handle computations with side effect instructions. PiperOrigin-RevId: 213361904 --- .../xla/service/hlo_liveness_analysis.cc | 35 +++++++- .../xla/service/hlo_liveness_analysis_test.cc | 84 +++++++++++++++++++ 2 files changed, 115 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc index 3a1dd471c6..5bf055f3c0 100644 --- a/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis.cc @@ -219,6 +219,33 @@ void PropagateLivenessToParameterCallers( } } +// Makes sure that if a live instruction is within a computation used in control +// flow operations, we mark live even other related instructions. +void PropagateLivenessThroughControlFlow( + const HloInstruction* instruction, + HloLivenessAnalysis::HloIndexMap* live_index_map, Worklist* worklist, + Workset* workset, CallGraph* call_graph) { + const CallGraphNode& call_graph_node = + call_graph->GetNode(instruction->parent()); + if (call_graph_node.context() == CallContext::kSequential) { + for (const CallSite& callsite : call_graph_node.caller_callsites()) { + HloInstruction* caller = callsite.instruction(); + if (caller->opcode() == HloOpcode::kWhile) { + // If a live instruction is within the %while body or condition + // computation, mark the predicate value returned by the condition + // computation live as well. + MarkLiveAtIndex(caller->while_condition()->root_instruction(), {}, + live_index_map, worklist, workset); + } else if (caller->opcode() == HloOpcode::kConditional) { + // If a live instruction is within the true or false branches of a + // conditional, we mark the predicate operand live as well. + MarkLiveAtIndex(caller->operand(0), {}, live_index_map, worklist, + workset); + } + } + } +} + } // namespace HloLivenessAnalysis::HloLivenessAnalysis(const HloModule& module) @@ -257,12 +284,10 @@ void HloLivenessAnalysis::RunAnalysis() { } else if (instruction->opcode() == HloOpcode::kGetTupleElement) { PropagateLivenessThroughGTE(instruction, &live_index_map_, &worklist, &workset); - } else if (instruction->opcode() == HloOpcode::kWhile && - ShapeUtil::IsTuple(instruction->shape())) { + } else if (instruction->opcode() == HloOpcode::kWhile) { PropagateLivenessThroughWhile(instruction, &live_index_map_, &worklist, &workset); - } else if (instruction->opcode() == HloOpcode::kParameter && - ShapeUtil::IsTuple(instruction->shape())) { + } else if (instruction->opcode() == HloOpcode::kParameter) { PropagateLivenessToParameterCallers(instruction, &live_index_map_, &worklist, &workset, call_graph_.get()); @@ -277,6 +302,8 @@ void HloLivenessAnalysis::RunAnalysis() { MarkLiveAtAllIndices(operand, &live_index_map_, &worklist, &workset); } } + PropagateLivenessThroughControlFlow(instruction, &live_index_map_, + &worklist, &workset, call_graph_.get()); } } diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc index 01b625c29c..e0ae1173c6 100644 --- a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc @@ -398,5 +398,89 @@ TEST_F(HloLivenessAnalysisTest, WhileWithLiveTupleElements) { EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "loop_var.1"), {2})); } +TEST_F(HloLivenessAnalysisTest, WhileWithOutfeed) { + auto module = ParseHloString(R"( + HloModule OutfeedLoop + WhileBody { + body_param = (s32[]) parameter(0) + token = token[] after-all() + constant.2 = s32[] constant(2) + outfeed_tuple = (s32[]) outfeed(constant.2, token) + get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0 + constant.1 = s32[] constant(1) + add = s32[] add(get-tuple-element.1, constant.1) + ROOT tuple = (s32[]) tuple(add) + } + WhileCondition { + cond_param = (s32[]) parameter(0) + get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0 + constant.2 = s32[] constant(10) + ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2) + } + ENTRY SimpleLoop { + constant.3 = s32[] constant(0) + tuple.1 = (s32[]) tuple(constant.3) + while = (s32[]) while(tuple.1), condition=WhileCondition, + body=WhileBody + ROOT rtuple = () tuple() + })") + .ValueOrDie(); + + const HloLivenessAnalysis& liveness = RunLiveness(module.get()); + EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "add"), {})); + EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "constant.3"), {})); +} + +TEST_F(HloLivenessAnalysisTest, NestedWhileWithOutfeed) { + auto module = ParseHloString(R"( + HloModule OutfeedLoop + InnerWhileBody { + body_param = (s32[]) parameter(0) + token = token[] after-all() + constant.2 = s32[] constant(2) + outfeed_tuple = (s32[]) outfeed(constant.2, token) + get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0 + constant.1 = s32[] constant(1) + add = s32[] add(get-tuple-element.1, constant.1) + ROOT tuple = (s32[]) tuple(add) + } + InnerWhileCondition { + cond_param = (s32[]) parameter(0) + get-tuple-element.3 = s32[] get-tuple-element(cond_param), index=0 + constant.2 = s32[] constant(10) + ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2) + } + OuterWhileCondition { + cond_param.2 = (s32[]) parameter(0) + get-tuple-element.5 = s32[] get-tuple-element(cond_param.2), index=0 + constant.5 = s32[] constant(5) + ROOT less-than.2 = pred[] less-than(get-tuple-element.5, constant.5) + } + OuterWhileBody { + body_param.2 = (s32[]) parameter(0) + get-tuple-element.8 = s32[] get-tuple-element(body_param.2), index=0 + constant.6 = s32[] constant(0) + tuple.2 = (s32[]) tuple(constant.6) + inner_while = (s32[]) while(tuple.2), condition=InnerWhileCondition, + body=InnerWhileBody + constant.7 = s32[] constant(1) + add.2 = s32[] add(get-tuple-element.8, constant.7) + ROOT rtuple = (s32[]) tuple(add.2) + } + ENTRY SimpleLoop { + constant.3 = s32[] constant(0) + tuple.1 = (s32[]) tuple(constant.3) + while = (s32[]) while(tuple.1), condition=OuterWhileCondition, + body=OuterWhileBody + ROOT rtuple = () tuple() + })") + .ValueOrDie(); + + const HloLivenessAnalysis& liveness = RunLiveness(module.get()); + EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "add"), {})); + EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "add.2"), {})); + EXPECT_TRUE(liveness.IsLive(GetInstruction(module.get(), "constant.3"), {})); +} + } // namespace } // namespace xla -- GitLab From 6805a8b27759a530f0ebab0670593a05455a64a0 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 17 Sep 2018 16:41:56 -0700 Subject: [PATCH 0299/1357] Changing `OpInputList` so that it is a forward iterator and taking advantage of the fact in the tf.data kernels. PiperOrigin-RevId: 213361953 --- tensorflow/core/framework/op_kernel.h | 31 ++++++++--- .../core/kernels/data/captured_function.cc | 29 ++++------- .../core/kernels/data/captured_function.h | 22 +++----- .../core/kernels/data/filter_dataset_op.cc | 13 ++--- .../core/kernels/data/flat_map_dataset_op.cc | 13 +---- .../core/kernels/data/generator_dataset_op.cc | 44 ++++------------ .../data/group_by_window_dataset_op.cc | 51 ++++--------------- .../kernels/data/interleave_dataset_op.cc | 12 +---- .../kernels/data/map_and_batch_dataset_op.cc | 12 +---- .../core/kernels/data/map_dataset_op.cc | 14 ++--- tensorflow/core/kernels/data/optional_ops.cc | 7 +-- .../data/parallel_interleave_dataset_op.cc | 25 ++------- .../kernels/data/parallel_map_dataset_op.cc | 14 ++--- .../kernels/data/parse_example_dataset_op.cc | 7 +-- .../core/kernels/data/scan_dataset_op.cc | 19 ++----- .../core/kernels/data/tensor_dataset_op.cc | 6 +-- 16 files changed, 88 insertions(+), 231 deletions(-) diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index e752599de1..4bbd6c3d7d 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -372,18 +372,37 @@ class OpKernelConstruction { template class OpArgIterator { public: - typedef OpArgIterator ME; + using iterator_category = std::forward_iterator_tag; + using value_type = ElementType; + using pointer = ElementType*; + using reference = ElementType&; + using difference_type = ptrdiff_t; + OpArgIterator(const ListType* list, int i) : list_(list), i_(i) {} - bool operator==(const ME& rhs) { + + bool operator==(const OpArgIterator& rhs) { DCHECK(list_ == rhs.list_); return i_ == rhs.i_; } - bool operator!=(const ME& rhs) { + + bool operator!=(const OpArgIterator& rhs) { DCHECK(list_ == rhs.list_); return i_ != rhs.i_; } - void operator++() { ++i_; } - ElementType& operator*() { return (*list_)[i_]; } + + OpArgIterator operator++() { // prefix ++it + ++i_; + return *this; + } + + OpArgIterator operator++(int) { // postfix it++ + OpArgIterator old_value = *this; + ++i_; + return old_value; + } + + reference operator*() { return (*list_)[i_]; } + pointer operator->() { return &(*list_)[i_]; } private: const ListType* const list_; @@ -394,7 +413,7 @@ class OpArgIterator { // that are passed to the op as a single named argument. class OpInputList { public: - typedef OpArgIterator Iterator; + typedef OpArgIterator Iterator; OpInputList() : ctx_(nullptr), start_(0), stop_(0) {} OpInputList(OpKernelContext* ctx, int start, int stop) : ctx_(ctx), start_(start), stop_(stop) {} diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index 31c8f5c0ea..b3ab7e2bc6 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -22,41 +22,30 @@ limitations under the License. #include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { namespace data { /* static */ Status CapturedFunction::Create( - const NameAttrList& func, std::vector captured_inputs, + const NameAttrList& func, OpKernelContext* ctx, const string& argument, std::unique_ptr* out_function) { - return Create(func, std::move(captured_inputs), true, out_function); + return CapturedFunction::Create(func, ctx, argument, true, out_function); } -/* static */ Status CapturedFunction::Create( - const NameAttrList& func, std::vector captured_inputs, + const NameAttrList& func, OpKernelContext* ctx, const string& argument, bool use_inter_op_parallelism, std::unique_ptr* out_function) { - out_function->reset(new CapturedFunction(func, std::move(captured_inputs), - use_inter_op_parallelism)); + OpInputList inputs; + TF_RETURN_IF_ERROR(ctx->input_list(argument, &inputs)); + std::vector arguments(inputs.begin(), inputs.end()); + *out_function = WrapUnique(new CapturedFunction(func, std::move(arguments), + use_inter_op_parallelism)); return Status::OK(); } -/* static */ -Status CapturedFunction::Create( - const NameAttrList& func, OpKernelContext* ctx, const string& argument, - std::unique_ptr* out_function) { - OpInputList argument_inputs; - TF_RETURN_IF_ERROR(ctx->input_list(argument, &argument_inputs)); - std::vector arguments_t; - arguments_t.reserve(argument_inputs.size()); - for (const Tensor& t : argument_inputs) { - arguments_t.push_back(t); - } - return CapturedFunction::Create(func, std::move(arguments_t), out_function); -} - CapturedFunction::~CapturedFunction() { if (lib_ != nullptr && f_handle_ != kInvalidHandle) { lib_->ReleaseHandle(f_handle_).IgnoreError(); diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h index 8b420fa5db..a10376bf97 100644 --- a/tensorflow/core/kernels/data/captured_function.h +++ b/tensorflow/core/kernels/data/captured_function.h @@ -42,27 +42,19 @@ namespace data { // context. class CapturedFunction { public: - // Creates a new instance from a list of named attributes and captured inputs. - // - // NOTE(mrry): The `captured_inputs` are passed by value. For - // efficiency, you are recommended to move this argument into the call. - static Status Create(const NameAttrList& func, - std::vector captured_inputs, + // Creates a new instance using a list of named attributes, fetching captured + // inputs from a context argument. + static Status Create(const NameAttrList& func, OpKernelContext* ctx, + const string& argument, std::unique_ptr* out_function); - // Creates a new instance from a list of named attributes and captured inputs. + // Creates a new instance using a list of named attributes, fetching captured + // inputs from a context argument. // // If `use_inter_op_parallelism` is false, the runtime may use an executor // that is optimized for small functions. - static Status Create(const NameAttrList& func, - std::vector captured_inputs, - bool use_inter_op_parallelism, - std::unique_ptr* out_function); - - // Creates a new instance using a list of named attributes, fetching captured - // inputs from a context argument. static Status Create(const NameAttrList& func, OpKernelContext* ctx, - const string& argument, + const string& argument, bool use_inter_op_parallelism, std::unique_ptr* out_function); ~CapturedFunction(); diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc index bf0aecaf3c..19c35f94a6 100644 --- a/tensorflow/core/kernels/data/filter_dataset_op.cc +++ b/tensorflow/core/kernels/data/filter_dataset_op.cc @@ -37,14 +37,6 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - FunctionLibraryRuntime::Handle pred_handle; OP_REQUIRES_OK(ctx, ctx->function_library()->Instantiate( @@ -61,9 +53,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { Node* ret_node = pred_body->ret_nodes[0]; Node* ret_input_node; OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node)); + std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), &captured_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + &captured_func)); if (ret_input_node->def().op() == "_Arg") { int32 index = -1; diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc index e3c45ef86c..2fada22a21 100644 --- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc @@ -39,18 +39,9 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), &captured_func)); - + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + &captured_func)); *output = new Dataset(ctx, input, func_, std::move(captured_func), output_types_, output_shapes_); } diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc index ac5cc1b2c1..71a36314a0 100644 --- a/tensorflow/core/kernels/data/generator_dataset_op.cc +++ b/tensorflow/core/kernels/data/generator_dataset_op.cc @@ -145,44 +145,18 @@ GeneratorDatasetOp::GeneratorDatasetOp(OpKernelConstruction* ctx) void GeneratorDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase** output) { - OpInputList init_func_other_args_input; - OP_REQUIRES_OK(ctx, ctx->input_list("init_func_other_args", - &init_func_other_args_input)); - std::vector init_func_other_args; - init_func_other_args.reserve(init_func_other_args_input.size()); - for (const Tensor& t : init_func_other_args_input) { - init_func_other_args.push_back(t); - } std::unique_ptr init_func; - OP_REQUIRES_OK( - ctx, CapturedFunction::Create(init_func_, std::move(init_func_other_args), - &init_func)); - - OpInputList next_func_other_args_input; - OP_REQUIRES_OK(ctx, ctx->input_list("next_func_other_args", - &next_func_other_args_input)); - std::vector next_func_other_args; - next_func_other_args.reserve(next_func_other_args_input.size()); - for (const Tensor& t : next_func_other_args_input) { - next_func_other_args.push_back(t); - } + OP_REQUIRES_OK(ctx, CapturedFunction::Create( + init_func_, ctx, "init_func_other_args", &init_func)); + std::unique_ptr next_func; - OP_REQUIRES_OK( - ctx, CapturedFunction::Create(next_func_, std::move(next_func_other_args), - &next_func)); - - OpInputList finalize_func_other_args_input; - OP_REQUIRES_OK(ctx, ctx->input_list("finalize_func_other_args", - &finalize_func_other_args_input)); - std::vector finalize_func_other_args; - finalize_func_other_args.reserve(finalize_func_other_args_input.size()); - for (const Tensor& t : finalize_func_other_args_input) { - finalize_func_other_args.push_back(t); - } - std::unique_ptr finalize_func; OP_REQUIRES_OK(ctx, CapturedFunction::Create( - finalize_func_, std::move(finalize_func_other_args), - &finalize_func)); + next_func_, ctx, "next_func_other_args", &next_func)); + + std::unique_ptr finalize_func; + OP_REQUIRES_OK(ctx, CapturedFunction::Create(finalize_func_, ctx, + "finalize_func_other_args", + &finalize_func)); *output = new Dataset(ctx, std::move(init_func), std::move(next_func), diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc index e4fa557598..8b417bb1c2 100644 --- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc @@ -42,50 +42,19 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - // Get captured inputs for the key, reduce, and window_size functions. - OpInputList key_func_other_argument_inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("key_func_other_arguments", - &key_func_other_argument_inputs)); - std::vector key_func_other_arguments; - key_func_other_arguments.reserve(key_func_other_argument_inputs.size()); - for (const Tensor& t : key_func_other_argument_inputs) { - key_func_other_arguments.push_back(t); - } - OpInputList reduce_func_other_argument_inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("reduce_func_other_arguments", - &reduce_func_other_argument_inputs)); - std::vector reduce_func_other_arguments; - reduce_func_other_arguments.reserve( - reduce_func_other_argument_inputs.size()); - for (const Tensor& t : reduce_func_other_argument_inputs) { - reduce_func_other_arguments.push_back(t); - } - OpInputList window_size_func_other_argument_inputs; - OP_REQUIRES_OK(ctx, - ctx->input_list("window_size_func_other_arguments", - &window_size_func_other_argument_inputs)); - std::vector window_size_func_other_arguments; - window_size_func_other_arguments.reserve( - window_size_func_other_argument_inputs.size()); - for (const Tensor& t : window_size_func_other_argument_inputs) { - window_size_func_other_arguments.push_back(t); - } - // TODO(mrry): Refactor CapturedFunction to share the runtime - // state between multiple functions? std::unique_ptr captured_key_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - key_func_, std::move(key_func_other_arguments), - &captured_key_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(key_func_, ctx, + "key_func_other_arguments", + &captured_key_func)); std::unique_ptr captured_reduce_func; - OP_REQUIRES_OK( - ctx, CapturedFunction::Create(reduce_func_, - std::move(reduce_func_other_arguments), - &captured_reduce_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(reduce_func_, ctx, + "reduce_func_other_arguments", + &captured_reduce_func)); std::unique_ptr captured_window_size_func; - OP_REQUIRES_OK( - ctx, CapturedFunction::Create( - window_size_func_, std::move(window_size_func_other_arguments), - &captured_window_size_func)); + OP_REQUIRES_OK(ctx, + CapturedFunction::Create(window_size_func_, ctx, + "window_size_func_other_arguments", + &captured_window_size_func)); *output = new Dataset( ctx, input, key_func_, reduce_func_, window_size_func_, diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc index 0768f46665..0aa802b874 100644 --- a/tensorflow/core/kernels/data/interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc @@ -39,14 +39,6 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - const Tensor* cycle_length_t; OP_REQUIRES_OK(ctx, ctx->input("cycle_length", &cycle_length_t)); OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(cycle_length_t->shape()), @@ -66,8 +58,8 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { errors::InvalidArgument("block_length must be greater than zero.")); std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), &captured_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + &captured_func)); *output = new Dataset(ctx, input, func_, std::move(captured_func), cycle_length, diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 80efac5d4b..83896219a3 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -49,14 +49,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { protected: void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - int64 batch_size; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "batch_size", &batch_size)); OP_REQUIRES( @@ -93,8 +85,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { ParseScalarArgument(ctx, "drop_remainder", &drop_remainder)); std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), &captured_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + &captured_func)); *output = new Dataset(ctx, input, batch_size, num_parallel_calls, drop_remainder, output_types_, output_shapes_, func_, diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index af301e2b42..f112e1dc43 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -38,18 +38,10 @@ class MapDatasetOp : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), - use_inter_op_parallelism_, &captured_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + use_inter_op_parallelism_, + &captured_func)); *output = new Dataset(ctx, input, func_, std::move(captured_func), output_types_, output_shapes_); diff --git a/tensorflow/core/kernels/data/optional_ops.cc b/tensorflow/core/kernels/data/optional_ops.cc index 6180df5af2..346e4ceebd 100644 --- a/tensorflow/core/kernels/data/optional_ops.cc +++ b/tensorflow/core/kernels/data/optional_ops.cc @@ -108,11 +108,8 @@ class OptionalFromValueOp : public OpKernel { void Compute(OpKernelContext* ctx) override { OpInputList components_input; OP_REQUIRES_OK(ctx, ctx->input_list("components", &components_input)); - std::vector components; - components.reserve(components_input.size()); - for (const Tensor& component_t : components_input) { - components.push_back(component_t); - } + std::vector components(components_input.begin(), + components_input.end()); OP_REQUIRES_OK( ctx, WriteOptionalWithValueToOutput(ctx, 0, std::move(components))); } diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 2f2db09508..9cd46bf5dd 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -44,14 +44,6 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - int64 cycle_length = 0; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cycle_length", &cycle_length)); @@ -83,8 +75,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr captured_func; OP_REQUIRES_OK( - ctx, CapturedFunction::Create( - interleave_func_, std::move(other_arguments), &captured_func)); + ctx, CapturedFunction::Create(interleave_func_, ctx, "other_arguments", + &captured_func)); *output = new Dataset(ctx, input, interleave_func_, std::move(captured_func), @@ -1102,9 +1094,6 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - int64 cycle_length = 0; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cycle_length", &cycle_length)); @@ -1128,16 +1117,10 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { errors::InvalidArgument( "num_parallel_calls must less than or equal to cycle_length.")); - // TODO(b/114267189): Use `other_arguments(inputs.begin(), inputs.end());`. - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } std::unique_ptr captured_func; OP_REQUIRES_OK( - ctx, CapturedFunction::Create( - interleave_func_, std::move(other_arguments), &captured_func)); + ctx, CapturedFunction::Create(interleave_func_, ctx, "other_arguments", + &captured_func)); *output = new Dataset(ctx, input, interleave_func_, std::move(captured_func), cycle_length, block_length, diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index b584316d69..6abe6c8338 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -44,14 +44,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { protected: void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } - int32 num_parallel_calls; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls", &num_parallel_calls)); @@ -60,9 +52,9 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { "num_parallel_calls must be greater than zero.")); std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), - use_inter_op_parallelism_, &captured_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + use_inter_op_parallelism_, + &captured_func)); *output = new Dataset(ctx, input, func_, num_parallel_calls, output_types_, output_shapes_, use_inter_op_parallelism_, diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc index 0cf5db017b..c28c06da62 100644 --- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc +++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc @@ -87,11 +87,8 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { "Expected len(dense_defaults) == len(dense_keys) but got: ", dense_default_tensors.size(), " vs. ", dense_keys_.size())); - std::vector dense_defaults; - dense_defaults.reserve(dense_default_tensors.size()); - for (const Tensor& dense_default_t : dense_default_tensors) { - dense_defaults.push_back(dense_default_t); - } + std::vector dense_defaults(dense_default_tensors.begin(), + dense_default_tensors.end()); for (int d = 0; d < dense_keys_.size(); ++d) { const Tensor& def_value = dense_defaults[d]; diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc index 6e515d6cc8..dbe31f37b8 100644 --- a/tensorflow/core/kernels/data/scan_dataset_op.cc +++ b/tensorflow/core/kernels/data/scan_dataset_op.cc @@ -45,23 +45,12 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { OpInputList initial_state_inputs; OP_REQUIRES_OK(ctx, ctx->input_list("initial_state", &initial_state_inputs)); - std::vector initial_state; - initial_state.reserve(initial_state_inputs.size()); - for (const Tensor& t : initial_state_inputs) { - initial_state.push_back(t); - } - - OpInputList inputs; - OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); - std::vector other_arguments; - other_arguments.reserve(inputs.size()); - for (const Tensor& t : inputs) { - other_arguments.push_back(t); - } + std::vector initial_state(initial_state_inputs.begin(), + initial_state_inputs.end()); std::unique_ptr captured_func; - OP_REQUIRES_OK(ctx, CapturedFunction::Create( - func_, std::move(other_arguments), &captured_func)); + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + &captured_func)); *output = new Dataset(ctx, input, func_, std::move(initial_state), std::move(captured_func), state_types_, output_types_, diff --git a/tensorflow/core/kernels/data/tensor_dataset_op.cc b/tensorflow/core/kernels/data/tensor_dataset_op.cc index e1cefd23d8..ca4ea25b89 100644 --- a/tensorflow/core/kernels/data/tensor_dataset_op.cc +++ b/tensorflow/core/kernels/data/tensor_dataset_op.cc @@ -33,11 +33,7 @@ class TensorDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, ctx->input_list("components", &inputs)); // TODO(mrry): Validate that the shapes of the "components" tensors match // the "shapes" attr.; - std::vector components; - components.reserve(inputs.size()); - for (const Tensor& t : inputs) { - components.push_back(t); - } + std::vector components(inputs.begin(), inputs.end()); *output = new Dataset(ctx, std::move(components)); } -- GitLab From 6e8293f1cdf2efe3cec2efdcfa89174893b0bace Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 16:51:19 -0700 Subject: [PATCH 0300/1357] Increase test timeout for dnn_tree_combined_estimator_test to de-flake. PiperOrigin-RevId: 213363558 --- tensorflow/contrib/boosted_trees/estimator_batch/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index 5fcb19a47a..14b6fc4ac2 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -173,6 +173,7 @@ py_library( py_test( name = "dnn_tree_combined_estimator_test", size = "medium", + timeout = "long", srcs = ["dnn_tree_combined_estimator_test.py"], srcs_version = "PY2AND3", tags = [ -- GitLab From 928389d4d61f0cb5932672aeeafadb1c18514dd3 Mon Sep 17 00:00:00 2001 From: Eddie Zhou Date: Mon, 17 Sep 2018 17:06:11 -0700 Subject: [PATCH 0301/1357] Fixed bug where a mixture of Variable and PartitionedVariable would break SDCA. Added new test that fails with `IndexError: list index out of range` in `_get_partitioned_update_ops` without the corresponding fix. Note that the effect of this bug is minimal, because for Estimator users, it only applies to sparse features that are not partitionable (e.g. [1,]), since all variables are created with the same partitioner in Estimator). PiperOrigin-RevId: 213365956 --- .../python/kernel_tests/sdca_ops_test.py | 62 +++++++++++++++++++ .../linear_optimizer/python/ops/sdca_ops.py | 26 ++++---- 2 files changed, 76 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 7a1914d41f..9ecf023e03 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -323,6 +323,68 @@ class SdcaWithLogisticLossTest(SdcaModelTest): self.assertAllClose( 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + def testSomePartitionedPrimals(self): + # Setup test data + example_protos = [ + make_example_proto({ + 'age': [0], + 'gender': [0] + }, 0), + make_example_proto({ + 'age': [0], + 'gender': [1] + }, 1), + ] + example_weights = [1.0, 1.0] + for num_shards in _SHARD_NUMBERS: + with self._single_threaded_test_session(): + examples = make_example_dict(example_protos, example_weights) + # Explicitly make age a [1]-shaped Variable (which cannot be + # partitioned), while making gender a PartitionedVariable. + age_weights = variables_lib.Variable( + array_ops.zeros([1], dtype=dtypes.float32)) + with variable_scope.variable_scope( + name_or_scope=('variables/shard_{}'.format(num_shards) + if num_shards else 'variables'), + partitioner=partitioned_variables.fixed_size_partitioner( + num_shards=2, axis=0)): + gender_weights = variable_scope.get_variable( + name='gender', + initializer=array_ops.zeros([2], dtype=dtypes.float32)) + variables = dict( + sparse_features_weights=[age_weights, gender_weights], + dense_features_weights=[]) + options = dict( + symmetric_l2_regularization=1, + symmetric_l1_regularization=0, + num_table_shards=num_shards, + loss_type='logistic_loss') + + lr = SdcaModel(examples, variables, options) + variables_lib.global_variables_initializer().run() + unregularized_loss = lr.unregularized_loss(examples) + loss = lr.regularized_loss(examples) + predictions = lr.predictions(examples) + self.assertAllClose(0.693147, unregularized_loss.eval()) + self.assertAllClose(0.693147, loss.eval()) + train_op = lr.minimize() + for _ in range(_MAX_ITERATIONS): + train_op.run() + lr.update_weights(train_op).run() + # The high tolerance in unregularized_loss comparisons is due to the + # fact that it's possible to trade off unregularized_loss vs. + # regularization and still have a sum that is quite close to the + # optimal regularized_loss value. SDCA's duality gap only ensures that + # the regularized_loss is within 0.01 of optimal. + # 0.525457 is the optimal regularized_loss. + # 0.593014 is the unregularized_loss at that optimum. + self.assertAllClose(0.512591, unregularized_loss.eval(), atol=0.05) + self.assertAllClose(0.593014, loss.eval(), atol=0.01) + predicted_labels = get_binary_predictions_for_logistic(predictions) + self.assertAllEqual([0, 1], predicted_labels.eval()) + self.assertAllClose( + 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + def testSparseRandom(self): dim = 20 num_examples = 1000 diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 14f59a3f64..b98adf862b 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -400,14 +400,16 @@ class SdcaModel(object): sparse_weights = [] sparse_indices = [] - # If we have partitioned variables, keep a few lists of Tensors around - # that we need for the assign_add after the op call to - # gen_sdca_ops.sdca_optimizer(). - num_partitions_by_var = [] - p_assignments_by_var = [] - gather_ids_by_var = [] - for w, i in zip(self._slots['unshrinked_sparse_features_weights'], - sparse_feature_indices): + # If we have partitioned variables, keep a few dictionaries of Tensors + # around that we need for the assign_add after the op call to + # gen_sdca_ops.sdca_optimizer(). These are keyed because we may have a + # mix of partitioned and un-partitioned variables. + num_partitions_by_var = {} + p_assignments_by_var = {} + gather_ids_by_var = {} + for v_num, (w, i) in enumerate( + zip(self._slots['unshrinked_sparse_features_weights'], + sparse_feature_indices)): # Append the sparse_indices (in full-variable space). sparse_idx = math_ops.cast( array_ops.unique(math_ops.cast(i, dtypes.int32))[0], @@ -456,10 +458,10 @@ class SdcaModel(object): gather_ids = data_flow_ops.dynamic_partition(new_ids, p_assignments, num_partitions) - # Append these to the lists for use in the later update. - num_partitions_by_var.append(num_partitions) - p_assignments_by_var.append(p_assignments) - gather_ids_by_var.append(gather_ids) + # Add these into the dictionaries for use in the later update. + num_partitions_by_var[v_num] = num_partitions + p_assignments_by_var[v_num] = p_assignments + gather_ids_by_var[v_num] = gather_ids # Gather the weights from each partition. partition_gathered_weights = [] -- GitLab From 6d9bb99ea7a697e465ef66dea821a86ca94f845d Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Mon, 17 Sep 2018 17:22:40 -0700 Subject: [PATCH 0302/1357] Addressing review comments: indentation --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 4 +--- tensorflow/core/kernels/partitioned_function_ops.cc | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 593f855ea2..01e5af5f8c 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -277,9 +277,7 @@ class MklCPUAllocator : public VisitableAllocator { // max_alloc_size from large_size_allocator would be the maximum // size allocated by MklCPUAllocator. stats->max_alloc_size = l_stats.max_alloc_size; - - stats->bytes_limit = - std::max(s_stats.bytes_limit, l_stats.bytes_limit); + stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit); } void ClearStats() override { diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index ddb621967a..42f99a73e6 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -100,8 +100,8 @@ class PartitionedCallOp : public AsyncOpKernel { // We need to pass global op_registry as default_registry when creating // graph. So that graph optimization passes can lookup all possible ops // by name. - FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), - fbody->graph->flib_def().ToProto()); + FunctionLibraryDefinition func_lib_def( + OpRegistry::Global(), fbody->graph->flib_def().ToProto()); auto graph = tensorflow::MakeUnique(func_lib_def); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -256,7 +256,7 @@ class PartitionedCallOp : public AsyncOpKernel { << partitions.size() << " shards."; FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), - graph->flib_def().ToProto()); + graph->flib_def().ToProto()); for (const auto& partition : partitions) { std::unique_ptr subgraph(new Graph(func_lib_def)); GraphConstructorOptions opts; -- GitLab From caf40776971791d00c7dd14057125ed5dd7346d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 17:20:42 -0700 Subject: [PATCH 0303/1357] Remove unnecessary side-effect test, since HLO liveness now reports correct liveness information if a control flow computation contains side effect instructions. PiperOrigin-RevId: 213367995 --- tensorflow/compiler/xla/service/hlo_module_dce.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module_dce.cc b/tensorflow/compiler/xla/service/hlo_module_dce.cc index f7be5cae22..31d26cc51e 100644 --- a/tensorflow/compiler/xla/service/hlo_module_dce.cc +++ b/tensorflow/compiler/xla/service/hlo_module_dce.cc @@ -50,9 +50,7 @@ StatusOr RunWhileDCE(HloModule* module, HloLivenessAnalysis* liveness) { auto* while_body_root = while_body_comp->root_instruction(); if (!ShapeUtil::IsTuple(xla_while->shape()) || - while_body_root->opcode() != HloOpcode::kTuple || - while_body_comp->HasSideEffect() || - xla_while->while_condition()->HasSideEffect()) { + while_body_root->opcode() != HloOpcode::kTuple) { // Only run DCE on tuple-shaped while loops where body root is Tuple, // with no I/O instructions. VLOG(1) << "WhileDCE SKIP while: " << xla_while->ToString(); -- GitLab From 4338803b98cd825b0b1d810bcc51c9a79734feb6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 17:26:09 -0700 Subject: [PATCH 0304/1357] Update ops-related pbtxt files. PiperOrigin-RevId: 213368723 --- tensorflow/core/ops/ops.pbtxt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 190f6aaa5b..4ece1c8953 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -36199,9 +36199,21 @@ op { type: DT_VARIANT } input_arg { - name: "window_size" + name: "size" + type: DT_INT64 + } + input_arg { + name: "shift" + type: DT_INT64 + } + input_arg { + name: "stride" type: DT_INT64 } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } output_arg { name: "handle" type: DT_VARIANT -- GitLab From 185aa89912376d4088c22615908696cd30f9951b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 17:49:36 -0700 Subject: [PATCH 0305/1357] Eliminate VisitableAllocator. The visitor pattern is used to allow pre-registration of memory for DMA access, e.g. for fast GPU/CPU i/o and for RDMA networking. The VisitableAllocator interface was introduced to support this use some time ago, prior to SubAllocators. Memory registration works best if it's done infrequently, on large pieces of memory, rather than on every piece that's dynamically allocated/freed. This usage pattern fits the SubAllocator better than a general Allocator. This change moves memory allocation visitor access to SubAllocator and eliminates the VisitableAllocator subclass of Allocator. This change also more rigorously enforces the requirement that all Visitors be declared prior to memory allocation begining. This is accomplished by requiring that Visitors be provided to the SubAllocator constructor. This refactoring will ease an upcoming CL introducing NUMA specific CPU devices. It also should fix some performance pitfalls (e.g. accidental use of PoolAllocator) introduced by an earlier refactoring of ProcessState that was also in preparation for NUMA. It restores the default use of the cpu_allocator() value (i.e. no SubAllocator) by model executions that don't use allocation visitors (since visitor registration must precede the first allocation, hence can be detected at that time). PiperOrigin-RevId: 213371553 --- tensorflow/contrib/gdr/gdr_memory_manager.cc | 102 +++++------ tensorflow/contrib/verbs/rdma_mgr.cc | 81 +++------ tensorflow/contrib/verbs/rdma_mgr.h | 1 + tensorflow/contrib/verbs/verbs_server_lib.cc | 5 + tensorflow/core/BUILD | 1 - .../core/common_runtime/bfc_allocator.cc | 21 +-- .../core/common_runtime/bfc_allocator.h | 14 +- .../common_runtime/gpu/cuda_host_allocator.h | 12 +- .../common_runtime/gpu/gpu_bfc_allocator.cc | 17 +- .../common_runtime/gpu/gpu_bfc_allocator.h | 44 +++-- .../gpu/gpu_bfc_allocator_test.cc | 90 ++++++++-- .../gpu/gpu_cudamalloc_allocator.cc | 10 +- .../gpu/gpu_cudamalloc_allocator.h | 11 +- .../common_runtime/gpu/gpu_debug_allocator.cc | 20 +-- .../common_runtime/gpu/gpu_debug_allocator.h | 20 +-- .../gpu/gpu_debug_allocator_test.cc | 35 +++- .../core/common_runtime/gpu/gpu_device.cc | 64 ++++--- .../core/common_runtime/gpu/gpu_device.h | 9 +- .../common_runtime/gpu/gpu_process_state.cc | 161 +++++++++++------- .../common_runtime/gpu/gpu_process_state.h | 58 ++++--- .../common_runtime/gpu/pool_allocator_test.cc | 68 ++++++-- .../core/common_runtime/mkl_cpu_allocator.h | 50 +----- .../core/common_runtime/pool_allocator.cc | 45 ++--- .../core/common_runtime/pool_allocator.h | 27 +-- .../core/common_runtime/process_state.cc | 71 ++++++-- .../core/common_runtime/process_state.h | 15 +- .../core/common_runtime/renamed_device.h | 7 +- .../core/common_runtime/visitable_allocator.h | 79 --------- tensorflow/core/framework/allocator.cc | 20 ++- tensorflow/core/framework/allocator.h | 28 ++- tensorflow/core/framework/device_base.h | 10 +- tensorflow/core/framework/op_kernel.cc | 9 +- 32 files changed, 628 insertions(+), 577 deletions(-) delete mode 100644 tensorflow/core/common_runtime/visitable_allocator.h diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index 726f74c7b7..bb06f1c41c 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -138,6 +138,8 @@ class GdrMemoryManager : public RemoteMemoryManager { Device* device, DeviceContext* device_context, bool on_host, StatusCallback done) override; + static void RegMemVisitors(); + protected: Status CreateEndpoint(const string& host, const string& port, RdmaEndpointPtr& endpoint); @@ -183,35 +185,51 @@ class GdrMemoryManager : public RemoteMemoryManager { TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager); }; -// TODO(byronyi): remove this class and its registration when the default -// cpu_allocator() returns visitable allocator, or cpu_allocator() is no -// longer in use. -class BFCGdrAllocator : public BFCAllocator { - public: - BFCGdrAllocator() - : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, - true, "cpu_gdr_bfc") {} -}; -class BFCGdrAllocatorFactory : public AllocatorFactory { - public: - Allocator* CreateAllocator() override { return new BFCGdrAllocator; } - - virtual SubAllocator* CreateSubAllocator(int numa_node) { - return new BasicCPUAllocator(numa_node); - } -}; - -REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 102, BFCGdrAllocatorFactory); - GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) : host_(host), port_(port), listening_(nullptr, EndpointDeleter), stopped_(true), - next_key_(0) {} + next_key_(0) { + static std::once_flag flag; + std::call_once(flag, []() { RegMemVisitors(); }); +} GdrMemoryManager::~GdrMemoryManager() { close(epfd_); } +/*static*/ void GdrMemoryManager::RegMemVisitors() { + SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + GdrMemoryManager::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes); + }; + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); + +#if GOOGLE_CUDA + if (IsGDRAvailable()) { + int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; + + // Note we don't free allocated GPU memory so there is no free visitor + SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); + LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; + } +#endif // GOOGLE_CUDA +} + Status GdrMemoryManager::Init() { epfd_ = epoll_create1(0); if (epfd_ == -1) { @@ -271,48 +289,6 @@ Status GdrMemoryManager::Init() { "cannot add server to epoll"); } - Allocator* allocators[] = { -#if GOOGLE_CUDA - GPUProcessState::singleton()->GetCUDAHostAllocator(0), -#endif // GOOGLE_CUDA - ProcessState::singleton()->GetCPUAllocator(0), - cpu_allocator(), - }; - - using namespace std::placeholders; - VisitableAllocator::Visitor alloc_visitor = - std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); - VisitableAllocator::Visitor free_visitor = - std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2); - - std::set instrumented_; - - // Host memory allocators - for (Allocator* allocator : allocators) { - auto* visitable_allocator = dynamic_cast(allocator); - CHECK(visitable_allocator) - << "is not visitable for instrumentation" << allocator->Name(); - // Make sure we don't instrument the same allocator twice - if (instrumented_.find(allocator) == std::end(instrumented_)) { - visitable_allocator->AddAllocVisitor(alloc_visitor); - visitable_allocator->AddFreeVisitor(free_visitor); - instrumented_.insert(allocator); - LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); - } - } - -#if GOOGLE_CUDA - VisitableAllocator::Visitor cuda_alloc_visitor = - std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); - if (IsGDRAvailable()) { - // Note we don't free allocated GPU memory so there is no free visitor - int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1; - GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, - cuda_alloc_visitor); - LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; - } -#endif // GOOGLE_CUDA - return Status::OK(); } diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 3cb5e61fac..2784bf124c 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include "tensorflow/contrib/verbs/grpc_verbs_client.h" #include "tensorflow/contrib/verbs/verbs_service.pb.h" -#include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/pool_allocator.h" @@ -29,6 +28,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/strcat.h" namespace tensorflow { @@ -256,74 +256,41 @@ void MRDeleter(ibv_mr* mr) { } } -// TODO(byronyi): remove this class and its registration when the default -// cpu_allocator() returns visitable allocator, or cpu_allocator() is no -// longer in use. -class BFCRdmaAllocator : public BFCAllocator { - public: - BFCRdmaAllocator() - : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, - true, "cpu_rdma_bfc") {} -}; -class BFCRdmaAllocatorFactory : public AllocatorFactory { - public: - Allocator* CreateAllocator() { return new BFCRdmaAllocator; } - - SubAllocator* CreateSubAllocator(int numa_node) { - return new BasicCPUAllocator(numa_node); - } -}; - -REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory); - void RdmaMgr::InitAllocators() { - RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; + static std::once_flag flag; + std::call_once( + flag, [this]() { RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; }); +} - Allocator* allocators[] = { -#if GOOGLE_CUDA - GPUProcessState::singleton()->GetCUDAHostAllocator(0), -#endif // GOOGLE_CUDA - ProcessState::singleton()->GetCPUAllocator(0), - cpu_allocator(), +/*static*/ void RdmaMgr::RegMemVisitors() { + SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().EvictMemoryRegion(ptr, num_bytes); }; - using namespace std::placeholders; - - std::set instrumented_; - - // Host memory allocators - for (Allocator* allocator : allocators) { - VisitableAllocator::Visitor alloc_visitor = - std::bind(&RdmaMemoryMgr::InsertMemoryRegion, - &RdmaMemoryMgr::Singleton(), _1, _2, allocator->Name()); - VisitableAllocator::Visitor free_visitor = std::bind( - &RdmaMemoryMgr::EvictMemoryRegion, &RdmaMemoryMgr::Singleton(), _1, _2); - - auto* visitable_allocator = dynamic_cast(allocator); - CHECK(visitable_allocator) - << "is not visitable for instrumentation" << allocator->Name(); - // Make sure we don't instrument the same allocator twice - if (instrumented_.find(allocator) == std::end(instrumented_)) { - visitable_allocator->AddAllocVisitor(alloc_visitor); - visitable_allocator->AddFreeVisitor(free_visitor); - instrumented_.insert(allocator); - LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); - } - } + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); #if GOOGLE_CUDA if (IsGDRAvailable()) { // Note we don't free allocated GPU memory so there is no free visitor int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - char buf[8]; - sprintf(buf, "gpu"); - VisitableAllocator::Visitor cuda_alloc_visitor = - std::bind(&RdmaMemoryMgr::InsertMemoryRegion, - &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf)); - + SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; } #endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index 9fffc335bb..74b92cc9a6 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -39,6 +39,7 @@ class RdmaMgr { void SetupChannels(); bool ConnectivityCheck(); void InitAllocators(); + static void RegMemVisitors(); const string& local_worker() { return local_worker_; } private: diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc index 1a0b5028fe..61469686e4 100644 --- a/tensorflow/contrib/verbs/verbs_server_lib.cc +++ b/tensorflow/contrib/verbs/verbs_server_lib.cc @@ -76,8 +76,13 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def, return Status::OK(); } +namespace { +std::once_call reg_mem_visitors_call; +} // namespace + Status VerbsServer::Init(ServiceInitFunction service_func, RendezvousMgrCreationFunction rendezvous_mgr_func) { + std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); }); Status s = GrpcServer::Init(service_func, rendezvous_mgr_func); { mutex_lock l(mu_); diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d55bd8d7ed..9bcf5b0865 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2783,7 +2783,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", "common_runtime/tracing_device.h", - "common_runtime/visitable_allocator.h", "common_runtime/process_state.h", "common_runtime/pool_allocator.h", "graph/gradients.h", diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 84c6285bbe..3843ea9e60 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -31,7 +31,7 @@ namespace tensorflow { BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, bool allow_growth, const string& name) - : suballocator_(sub_allocator), + : sub_allocator_(sub_allocator), name_(name), free_chunks_list_(kInvalidChunkHandle), next_allocation_id_(1) { @@ -72,7 +72,7 @@ BFCAllocator::~BFCAllocator() { VLOG(2) << "Number of regions allocated: " << region_manager_.regions().size(); for (const auto& region : region_manager_.regions()) { - suballocator_->Free(region.ptr(), region.memory_size()); + sub_allocator_->Free(region.ptr(), region.memory_size()); } for (BinNum b = 0; b < kNumBins; b++) { @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(alignment, bytes); + void* mem_addr = sub_allocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(alignment, bytes); + mem_addr = sub_allocator_->Alloc(alignment, bytes); } } @@ -158,10 +158,6 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Insert the chunk into the right bin. InsertFreeChunkIntoBin(h); - // Invoke visitors on newly allocated region. - for (const auto& visitor : region_visitors_) { - visitor(mem_addr, bytes); - } return true; } @@ -490,15 +486,6 @@ void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) { InsertFreeChunkIntoBin(coalesced_chunk); } -void BFCAllocator::AddAllocVisitor(Visitor visitor) { - VLOG(1) << "AddVisitor"; - mutex_lock l(lock_); - region_visitors_.push_back(visitor); - for (const auto& region : region_manager_.regions()) { - visitor(region.ptr(), region.memory_size()); - } -} - bool BFCAllocator::TracksAllocationSizes() { return true; } size_t BFCAllocator::RequestedSize(const void* ptr) { diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index 20e1dab1d5..364071e066 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -23,7 +23,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/allocator_retry.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/macros.h" @@ -42,7 +42,7 @@ namespace tensorflow { // coalescing. One assumption we make is that the process using this // allocator owns pretty much all of the memory, and that nearly // all requests to allocate memory go through this interface. -class BFCAllocator : public VisitableAllocator { +class BFCAllocator : public Allocator { public: // Takes ownership of sub_allocator. BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, @@ -55,11 +55,6 @@ class BFCAllocator : public VisitableAllocator { const AllocationAttributes& allocation_attr) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - - // Does nothing, because memory is never freed. - void AddFreeVisitor(Visitor visitor) override {} - bool TracksAllocationSizes() override; size_t RequestedSize(const void* ptr) override; @@ -423,7 +418,7 @@ class BFCAllocator : public VisitableAllocator { // of the available memory. bool started_backpedal_ = false; - std::unique_ptr suballocator_; + std::unique_ptr sub_allocator_; string name_; // Structures mutable after construction @@ -435,9 +430,6 @@ class BFCAllocator : public VisitableAllocator { // Pointer to head of linked list of free Chunks ChunkHandle free_chunks_list_ GUARDED_BY(lock_); - // Called once on each region, ASAP. - std::vector region_visitors_ GUARDED_BY(lock_); - // Counter containing the next unique identifier to assign to a // newly-created chunk. int64 next_allocation_id_ GUARDED_BY(lock_); diff --git a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h index 636cd43575..6bd29ef775 100644 --- a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h +++ b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h @@ -26,8 +26,12 @@ namespace tensorflow { class CUDAHostAllocator : public SubAllocator { public: // Note: stream_exec cannot be null. - explicit CUDAHostAllocator(se::StreamExecutor* stream_exec) - : stream_exec_(stream_exec) { + explicit CUDAHostAllocator(se::StreamExecutor* stream_exec, int numa_node, + const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), + stream_exec_(stream_exec), + numa_node_(numa_node) { CHECK(stream_exec_ != nullptr); } ~CUDAHostAllocator() override {} @@ -39,19 +43,23 @@ class CUDAHostAllocator : public SubAllocator { if (ptr == nullptr) { LOG(WARNING) << "could not allocate pinned host memory of size: " << num_bytes; + return ptr; } + VisitAlloc(ptr, numa_node_, num_bytes); } return ptr; } void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { + VisitFree(ptr, numa_node_, num_bytes); stream_exec_->HostMemoryDeallocate(ptr); } } private: se::StreamExecutor* stream_exec_; // not owned, non-null + const int numa_node_; TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc index 2d4c8d0201..44ffce77a1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc @@ -22,18 +22,15 @@ limitations under the License. namespace tensorflow { -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const string& name) - : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {} +GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator, + size_t total_memory, const string& name) + : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {} -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, +GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator, + size_t total_memory, const GPUOptions& gpu_options, const string& name) - : BFCAllocator( - new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), - gpu_options.per_process_gpu_memory_fraction() > 1.0 || - gpu_options.experimental().use_unified_memory()), - total_memory, gpu_options.allow_growth(), name) {} + : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(), + name) {} } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index f1cc2eace1..6b6de80734 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -31,28 +31,20 @@ limitations under the License. namespace tensorflow { -// A GPU memory allocator that implements a 'best-fit with coalescing' -// algorithm. -class GPUBFCAllocator : public BFCAllocator { - public: - // 'cuda_gpu_id' refers to the ID of the GPU device within - // the process and must reference a valid ID in the process. - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const string& name); - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const GPUOptions& gpu_options, const string& name); - virtual ~GPUBFCAllocator() {} - - TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); -}; - // Suballocator for GPU memory. class GPUMemAllocator : public SubAllocator { public: + // 'cuda_gpu_id' refers to the ID of the GPU device within + // the process and must reference a valid ID in the process. // Note: stream_exec cannot be null. - explicit GPUMemAllocator(se::StreamExecutor* stream_exec, - bool use_unified_memory) - : stream_exec_(stream_exec), use_unified_memory_(use_unified_memory) { + explicit GPUMemAllocator(se::StreamExecutor* stream_exec, CudaGpuId gpu_id, + bool use_unified_memory, + const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), + stream_exec_(stream_exec), + gpu_id_(gpu_id), + use_unified_memory_(use_unified_memory) { CHECK(stream_exec_ != nullptr); } ~GPUMemAllocator() override {} @@ -65,12 +57,14 @@ class GPUMemAllocator : public SubAllocator { } else { ptr = stream_exec_->AllocateArray(num_bytes).opaque(); } + VisitAlloc(ptr, gpu_id_.value(), num_bytes); } return ptr; } void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { + VisitFree(ptr, gpu_id_.value(), num_bytes); if (use_unified_memory_) { stream_exec_->UnifiedMemoryDeallocate(ptr); } else { @@ -82,11 +76,25 @@ class GPUMemAllocator : public SubAllocator { private: se::StreamExecutor* stream_exec_; // not owned, non-null + const CudaGpuId gpu_id_; const bool use_unified_memory_ = false; TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator); }; +// A GPU memory allocator that implements a 'best-fit with coalescing' +// algorithm. +class GPUBFCAllocator : public BFCAllocator { + public: + GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory, + const string& name); + GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory, + const GPUOptions& gpu_options, const string& name); + ~GPUBFCAllocator() override {} + + TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); +}; + } // namespace tensorflow #endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_BFC_ALLOCATOR_H_ diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 67caeb3495..7112c3afd4 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -46,7 +47,11 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use, } TEST(GPUBFCAllocatorTest, NoDups) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); // Allocate a lot of raw pointers @@ -75,7 +80,11 @@ TEST(GPUBFCAllocatorTest, NoDups) { } TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); // Allocate 256 raw pointers of sizes between 100 bytes and about // a meg random::PhiloxRandom philox(123, 17); @@ -133,7 +142,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { } TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); float* first_ptr = a.Allocate(1024); @@ -168,18 +181,30 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { } TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); float* ptr = a.Allocate(0); EXPECT_EQ(nullptr, ptr); } TEST(GPUBFCAllocatorTest, TracksSizes) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); @@ -187,8 +212,12 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { } TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) { + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); // Configure a 1MiB byte limit - GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc"); + GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc"); float* first_ptr = a.Allocate(1 << 6); float* second_ptr = a.Allocate(1 << 20); @@ -203,7 +232,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { options.set_allow_growth(true); // Max of 2GiB, but starts out small. - GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1LL << 31, "GPU_0_bfc"); // Allocate 10 raw pointers of sizes between 100 bytes and about // 64 megs. @@ -264,8 +297,15 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { } TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { - GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); - GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1UL << 60, "GPU_0_bfc"); + sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator b(sub_allocator, 1UL << 60, "GPU_0_bfc"); void* amem = a.AllocateRaw(1, 1); void* bmem = b.AllocateRaw(1, 1 << 30); a.DeallocateRaw(amem); @@ -273,7 +313,11 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { } static void BM_Allocation(int iters) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 524288, 512, 1048576, 10485760, 104857600, @@ -289,7 +333,11 @@ static void BM_Allocation(int iters) { BENCHMARK(BM_Allocation); static void BM_AllocationThreaded(int iters, int num_threads) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc"); thread::ThreadPool pool(Env::Default(), "test", num_threads); std::atomic_int_fast32_t count(iters); mutex done_lock; @@ -325,7 +373,11 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16); // A more complex benchmark that defers deallocation of an object for // "delay" allocations. static void BM_AllocationDelayed(int iters, int delay) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 4096, 512, 1024, 1024}; int size_index = 0; @@ -363,7 +415,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { // only methods inside this class can access private members of BFCAllocator. void TestBinDebugInfo() { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); std::vector initial_ptrs; std::vector initial_ptrs_allocated_sizes; @@ -441,7 +497,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { } void TestLog2FloorNonZeroSlow() { - GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 /* total_memory */, "GPU_0_bfc"); EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0)); EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1)); EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc index 934a57a5fb..8e14f1ea75 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc @@ -27,7 +27,7 @@ limitations under the License. namespace tensorflow { -GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator, +GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -60,14 +60,6 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) { #endif // GOOGLE_CUDA } -void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) { - return base_allocator_->AddAllocVisitor(visitor); -} - -void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) { - return base_allocator_->AddFreeVisitor(visitor); -} - bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 856fdc34b4..3d1d0ef481 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -19,7 +19,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -29,20 +29,17 @@ namespace tensorflow { // An allocator that wraps a GPU allocator and adds debugging // functionality that verifies that users do not write outside their // allocated memory. -class GPUcudaMallocAllocator : public VisitableAllocator { +class GPUcudaMallocAllocator : public Allocator { public: - explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + explicit GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); ~GPUcudaMallocAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - void AddFreeVisitor(Visitor visitor) override; bool TracksAllocationSizes() override; private: - VisitableAllocator* base_allocator_ = nullptr; // owned + Allocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index e4c834b30d..6bad66dcec 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -73,7 +73,7 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { // ----------------------------------------------------------------------------- // GPUDebugAllocator // ----------------------------------------------------------------------------- -GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator, +GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -111,14 +111,6 @@ void GPUDebugAllocator::DeallocateRaw(void* ptr) { base_allocator_->DeallocateRaw(ptr); } -void GPUDebugAllocator::AddAllocVisitor(Visitor visitor) { - return base_allocator_->AddAllocVisitor(visitor); -} - -void GPUDebugAllocator::AddFreeVisitor(Visitor visitor) { - return base_allocator_->AddFreeVisitor(visitor); -} - bool GPUDebugAllocator::TracksAllocationSizes() { return true; } size_t GPUDebugAllocator::RequestedSize(const void* ptr) { @@ -158,7 +150,7 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) { // ----------------------------------------------------------------------------- // GPUNanResetAllocator // ----------------------------------------------------------------------------- -GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator, +GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -200,14 +192,6 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) { base_allocator_->DeallocateRaw(ptr); } -void GPUNanResetAllocator::AddAllocVisitor(Visitor visitor) { - return base_allocator_->AddAllocVisitor(visitor); -} - -void GPUNanResetAllocator::AddFreeVisitor(Visitor visitor) { - return base_allocator_->AddFreeVisitor(visitor); -} - size_t GPUNanResetAllocator::RequestedSize(const void* ptr) { return base_allocator_->RequestedSize(ptr); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index 0f9b72040c..0f27ff4384 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -21,7 +21,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -31,16 +31,13 @@ namespace tensorflow { // An allocator that wraps a GPU allocator and adds debugging // functionality that verifies that users do not write outside their // allocated memory. -class GPUDebugAllocator : public VisitableAllocator { +class GPUDebugAllocator : public Allocator { public: - explicit GPUDebugAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + explicit GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); ~GPUDebugAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - void AddFreeVisitor(Visitor visitor) override; bool TracksAllocationSizes() override; size_t RequestedSize(const void* ptr) override; size_t AllocatedSize(const void* ptr) override; @@ -53,7 +50,7 @@ class GPUDebugAllocator : public VisitableAllocator { bool CheckFooter(void* ptr); private: - VisitableAllocator* base_allocator_ = nullptr; // owned + Allocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. @@ -63,23 +60,20 @@ class GPUDebugAllocator : public VisitableAllocator { // An allocator that wraps a GPU allocator and resets the memory on // allocation and free to 'NaN', helping to identify cases where the // user forgets to initialize the memory. -class GPUNanResetAllocator : public VisitableAllocator { +class GPUNanResetAllocator : public Allocator { public: - explicit GPUNanResetAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + explicit GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); ~GPUNanResetAllocator() override; string Name() override { return "gpu_nan_reset"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - void AddFreeVisitor(Visitor visitor) override; size_t RequestedSize(const void* ptr) override; size_t AllocatedSize(const void* ptr) override; void GetStats(AllocatorStats* stats) override; void ClearStats() override; private: - VisitableAllocator* base_allocator_ = nullptr; // owned + Allocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index 236a0afa0b..98283cd846 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -35,7 +35,10 @@ namespace { TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -59,7 +62,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { EXPECT_DEATH( { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + cuda_gpu_id, false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -92,7 +98,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { EXPECT_DEATH( { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + cuda_gpu_id, false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -122,7 +131,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { TEST(GPUDebugAllocatorTest, ResetToNan) { const CudaGpuId cuda_gpu_id(0); - GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -163,8 +175,11 @@ TEST(GPUDebugAllocatorTest, ResetToNan) { TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { const CudaGpuId cuda_gpu_id(0); // NaN reset must be the outer-most allocator. + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -205,15 +220,21 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { TEST(GPUDebugAllocatorTest, TracksSizes) { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { const CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id), cuda_gpu_id); float* t1 = a.Allocate(1); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 2763ac0d4a..50e61b7e00 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -41,7 +41,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/common_runtime/local_device.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/op_kernel.h" @@ -285,6 +284,38 @@ BaseGPUDevice::~BaseGPUDevice() { for (auto ctx : device_contexts_) ctx->Unref(); } +// This should be idempotent if already initialized. +Status BaseGPUDevice::InitScratchBuffers() { + mutex_lock l(scratch_init_mutex_); + if (scratch_.size() < max_streams_) { + for (int i = 0; i < max_streams_; i++) { + DCHECK(streams_[i]); + if (scratch_.size() > i && scratch_[i]) continue; + size_t scratch_buffer_size = + Eigen::kCudaScratchSize + sizeof(unsigned int); + void* scratch_buffer = gpu_allocator_->AllocateRaw( + Allocator::kAllocatorAlignment, scratch_buffer_size); + if (scratch_buffer == nullptr) { + return errors::FailedPrecondition( + "Failed to allocate scratch buffer for device ", + tf_gpu_id_.value()); + } + se::DeviceMemory mem( + se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); + + bool ok = executor_->SynchronousMemZero( + &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); + if (!ok) { + return errors::FailedPrecondition( + "Failed to memcopy into scratch buffer for device ", + tf_gpu_id_.value()); + } + scratch_.push_back(static_cast(scratch_buffer)); + } + } + return Status::OK(); +} + Status BaseGPUDevice::Init(const SessionOptions& options) { auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_); if (!executor_status.status().ok()) { @@ -303,27 +334,6 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { for (int i = 0; i < max_streams_; i++) { streams_.push_back(StreamGroupFactory::Global().GetOrCreate( tf_gpu_id_, i, executor_, options.config.gpu_options())); - - size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int); - void* scratch_buffer = gpu_allocator_->AllocateRaw( - Allocator::kAllocatorAlignment, scratch_buffer_size); - if (scratch_buffer == nullptr) { - return errors::FailedPrecondition( - "Failed to allocate scratch buffer for device ", tf_gpu_id_.value()); - } - scratch_.push_back(static_cast(scratch_buffer)); - - se::DeviceMemory mem( - se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); - - bool ok = executor_->SynchronousMemZero( - &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); - if (!ok) { - return errors::FailedPrecondition( - "Failed to memcopy into scratch buffer for device ", - tf_gpu_id_.value()); - } - device_contexts_.push_back(new GPUDeviceContext( i, streams_.back()->compute, streams_.back()->host_to_device, streams_.back()->device_to_host, streams_.back()->device_to_device)); @@ -867,10 +877,11 @@ PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() { return new ConcretePerOpGpuDevice(); } -void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, - PerOpGpuDevice* device, - DeviceContext* dc, - Allocator* allocator) { +Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, + PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) { + TF_RETURN_IF_ERROR(InitScratchBuffers()); if (dc) { const GPUDeviceContext* gpu_dc = static_cast(dc); const int stream_id = gpu_dc->stream_id(); @@ -881,6 +892,7 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, } else { ReinitializeDevice(context, device, 0, allocator); } + return Status::OK(); } Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr, diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 56d03d7a8c..b3eea55758 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -86,8 +86,9 @@ class BaseGPUDevice : public LocalDevice { // The caller owns the returned device. PerOpGpuDevice* MakeGpuDevice() override; - void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, Allocator* allocator) override; + Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) override; // Returns the CUDA GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. @@ -125,6 +126,7 @@ class BaseGPUDevice : public LocalDevice { class StreamGroupFactory; gtl::InlinedVector streams_; + mutex scratch_init_mutex_; gtl::InlinedVector scratch_; std::vector device_contexts_; GpuDeviceInfo* gpu_device_info_ = nullptr; @@ -135,6 +137,9 @@ class BaseGPUDevice : public LocalDevice { std::unique_ptr em_; std::unique_ptr thread_pool_; + // Initialize scractch buffers used by Eigen. + Status InitScratchBuffers(); + void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index b18688174d..9ec740fabe 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -76,12 +76,16 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) { // This function is defined for debugging problems with the allocators. GPUProcessState::~GPUProcessState() { CHECK_EQ(this, instance_); - for (auto p : gpu_allocators_) { - delete p; - } instance_ = nullptr; } +int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) { + // Return the NUMA node associated with the GPU's StreamExecutor. + se::StreamExecutor* se = + GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie(); + return se->GetDeviceDescription().numa_node(); +} + Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, TfGpuId tf_gpu_id, size_t total_bytes) { @@ -93,13 +97,10 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, if (tf_gpu_id.value() >= static_cast(gpu_allocators_.size())) { gpu_allocators_.resize(tf_gpu_id.value() + 1); - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) - gpu_al_.resize(tf_gpu_id.value() + 1); } - if (gpu_allocators_[tf_gpu_id.value()] == nullptr) { - VisitableAllocator* gpu_allocator; - + AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()]; + if (allocator_parts.allocator.get() == nullptr) { // Validate allocator types. if (!allocator_type.empty() && allocator_type != "BFC") { LOG(ERROR) << "Invalid allocator type: " << allocator_type; @@ -108,8 +109,17 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, CudaGpuId cuda_gpu_id; TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - gpu_allocator = - new GPUBFCAllocator(cuda_gpu_id, total_bytes, options, + int bus_id = BusIdForGPU(tf_gpu_id); + while (bus_id >= gpu_visitors_.size()) { + gpu_visitors_.push_back({}); + } + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + (options.per_process_gpu_memory_fraction() > 1.0 || + options.experimental().use_unified_memory()), + gpu_visitors_[bus_id], {}); + Allocator* gpu_allocator = + new GPUBFCAllocator(sub_allocator, total_bytes, options, strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc")); // If true, checks for memory overwrites by writing @@ -123,34 +133,25 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, // **WARNING** probably will not work in a multi-gpu scenario gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id); } - gpu_allocators_[tf_gpu_id.value()] = gpu_allocator; - - // If there are any pending AllocVisitors for this bus, add - // them now. - se::StreamExecutor* se = - GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie(); - int bus_id = se->GetDeviceDescription().numa_node(); - if (bus_id >= 0 && bus_id < static_cast(gpu_visitors_.size())) { - for (const auto& v : gpu_visitors_[bus_id]) { - gpu_allocator->AddAllocVisitor(v); - } - } + + Allocator* recording_allocator = nullptr; if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::GPU; md.dev_index = cuda_gpu_id.value(); md.gpu_registered = false; md.nic_registered = true; - if (static_cast(gpu_al_.size()) <= tf_gpu_id.value()) { - gpu_al_.resize(tf_gpu_id.value() + 1); - } - gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator( + recording_allocator = new internal::RecordingAllocator( &process_state_->mem_desc_map_, gpu_allocator, md, &mu_); } + allocator_parts = {std::unique_ptr(gpu_allocator), sub_allocator, + std::unique_ptr(recording_allocator)}; + } + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { + return allocator_parts.recording_allocator.get(); + } else { + return allocator_parts.allocator.get(); } - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) - return gpu_al_[tf_gpu_id.value()]; - return gpu_allocators_[tf_gpu_id.value()]; #else LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda."; return nullptr; @@ -172,11 +173,12 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { tf_shared_lock lock(mu_); if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types && - static_cast(cuda_al_.size()) > 0) { - return cuda_al_[0]; + !cuda_host_allocators_.empty() && + cuda_host_allocators_[0].recording_allocator != nullptr) { + return cuda_host_allocators_[0].recording_allocator.get(); } if (static_cast(cuda_host_allocators_.size()) > numa_node) { - return cuda_host_allocators_[0]; + return cuda_host_allocators_[0].allocator.get(); } } @@ -190,7 +192,7 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { // it knows is valid. se::StreamExecutor* se = nullptr; for (int i = 0; i < static_cast(gpu_allocators_.size()); ++i) { - if (gpu_allocators_[i] != nullptr) { + if (gpu_allocators_[i].allocator != nullptr) { se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie(); break; } @@ -199,6 +201,15 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { CHECK_NE(nullptr, se); while (static_cast(cuda_host_allocators_.size()) <= numa_node) { + while (cuda_host_alloc_visitors_.size() <= numa_node) { + cuda_host_alloc_visitors_.push_back({}); + } + while (cuda_host_free_visitors_.size() <= numa_node) { + cuda_host_free_visitors_.push_back({}); + } + SubAllocator* sub_allocator = new CUDAHostAllocator( + se, numa_node, cuda_host_alloc_visitors_[numa_node], + cuda_host_free_visitors_[numa_node]); // TODO(zheng-xq): evaluate whether 64GB by default is the best choice. int64 cuda_host_mem_limit_in_mb = -1; Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB", @@ -208,62 +219,92 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message(); } int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20); - VisitableAllocator* allocator = - new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit, + Allocator* allocator = + new BFCAllocator(sub_allocator, cuda_host_mem_limit, true /*allow_growth*/, "cuda_host_bfc" /*name*/); - if (LogMemory::IsEnabled()) { + if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) { // Wrap the allocator to track allocation ids for better logging // at the cost of performance. - allocator = new TrackingVisitableAllocator(allocator, true); + allocator = new TrackingAllocator(allocator, true); } - cuda_host_allocators_.push_back(allocator); + cuda_host_allocators_.push_back({std::unique_ptr(allocator), + sub_allocator, + std::unique_ptr(nullptr)}); + AllocatorParts& allocator_parts = cuda_host_allocators_.back(); if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::CPU; md.dev_index = 0; md.gpu_registered = true; md.nic_registered = false; - cuda_al_.push_back(new internal::RecordingAllocator( - &process_state_->mem_desc_map_, cuda_host_allocators_.back(), md, - &mu_)); + allocator_parts.recording_allocator.reset( + new internal::RecordingAllocator(&process_state_->mem_desc_map_, + allocator_parts.allocator.get(), md, + &mu_)); } } - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) - return cuda_al_[0]; - return cuda_host_allocators_[0]; + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { + return cuda_host_allocators_[0].recording_allocator.get(); + } else { + return cuda_host_allocators_[0].allocator.get(); + } } void GPUProcessState::AddGPUAllocVisitor(int bus_id, - const AllocVisitor& visitor) { - CHECK(process_state_); + const SubAllocator::Visitor& visitor) { #if GOOGLE_CUDA mutex_lock lock(mu_); - for (int i = 0; i < static_cast(gpu_allocators_.size()); ++i) { - se::StreamExecutor* se = - GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie(); - if (gpu_allocators_[i] && - (se->GetDeviceDescription().numa_node() + 1) == bus_id) { - gpu_allocators_[i]->AddAllocVisitor(visitor); - } - } + CHECK(gpu_allocators_.empty()) // Crash OK + << "AddGPUAllocVisitor must be called before " + "first call to GetGPUAllocator."; while (bus_id >= static_cast(gpu_visitors_.size())) { - gpu_visitors_.push_back(std::vector()); + gpu_visitors_.push_back(std::vector()); } gpu_visitors_[bus_id].push_back(visitor); #endif // GOOGLE_CUDA } +void GPUProcessState::AddCUDAHostAllocVisitor( + int numa_node, const SubAllocator::Visitor& visitor) { +#if GOOGLE_CUDA + mutex_lock lock(mu_); + CHECK(cuda_host_allocators_.empty()) // Crash OK + << "AddCUDAHostAllocVisitor must be called before " + "first call to GetCUDAHostAllocator."; + while (numa_node >= static_cast(cuda_host_alloc_visitors_.size())) { + cuda_host_alloc_visitors_.push_back(std::vector()); + } + cuda_host_alloc_visitors_[numa_node].push_back(visitor); +#endif // GOOGLE_CUDA +} + +void GPUProcessState::AddCUDAHostFreeVisitor( + int numa_node, const SubAllocator::Visitor& visitor) { +#if GOOGLE_CUDA + mutex_lock lock(mu_); + CHECK(cuda_host_allocators_.empty()) // Crash OK + << "AddCUDAHostFreeVisitor must be called before " + "first call to GetCUDAHostAllocator."; + while (numa_node >= static_cast(cuda_host_free_visitors_.size())) { + cuda_host_free_visitors_.push_back(std::vector()); + } + cuda_host_free_visitors_[numa_node].push_back(visitor); +#endif // GOOGLE_CUDA +} + void GPUProcessState::TestOnlyReset() { - process_state_->ProcessState::TestOnlyReset(); + if (process_state_) { + process_state_->ProcessState::TestOnlyReset(); + } { mutex_lock lock(mu_); gpu_device_enabled_ = false; + gpu_allocators_.clear(); gpu_visitors_.clear(); - gtl::STLDeleteElements(&gpu_allocators_); - gtl::STLDeleteElements(&cuda_host_allocators_); - gtl::STLDeleteElements(&gpu_al_); - gtl::STLDeleteElements(&cuda_al_); + cuda_host_allocators_.clear(); + cuda_host_alloc_visitors_.clear(); + cuda_host_free_visitors_.clear(); } } diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h index cb41c3c6bd..43e9a31660 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h @@ -32,7 +32,6 @@ limitations under the License. namespace tensorflow { class Allocator; -class VisitableAllocator; class PoolAllocator; // Singleton that manages per-process state when GPUs are present. @@ -72,18 +71,30 @@ class GPUProcessState { virtual Allocator* GetCUDAHostAllocator(int numa_node); - // Registers a function to be called once on every new Region - // allocated by every GPURegionAllocator proximate to the specified - // bus. The AllocVisitor is provided with a memory pointer and the - // size of the area it identifies. The pointer is not guaranteed to - // be valid after the call terminates. The intention is for this - // interface to be used for network device memory registration. - // "bus_id" is platform-specific. On many platforms it - // should be 0. On machines with multiple PCIe buses, it should be - // the index of one of the PCIe buses. If the bus_id is invalid, - // results are undefined. - typedef std::function AllocVisitor; - virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor); + // Registers a Visitor to be invoked on new chunks of memory allocated by the + // SubAllocator of every GPU proximate to the specified bus. The AllocVisitor + // is provided with a memory pointer, a GPU id, and the size of the area it + // identifies. The pointer is not guaranteed to be valid after the call + // terminates. The intention is for this interface to be used for network + // device memory registration. "bus_id" is platform-specific. On many + // platforms it should be 0. On machines with multiple PCIe buses, it should + // be the index of one of the PCIe buses (maybe the NUMA node at which the + // PCIe is rooted). If the bus_id is invalid, results are undefined. + virtual void AddGPUAllocVisitor(int bus_id, + const SubAllocator::Visitor& visitor); + + // Registers a Visitor to be invoked on new chunks of memory allocated by + // the SubAllocator of the CUDAHostAllocator for the given numa_node. + virtual void AddCUDAHostAllocVisitor(int numa_node, + const SubAllocator::Visitor& visitor); + + // Registers a Visitor to be invoked on each chunk handed back for freeing to + // the SubAllocator of the CUDAHostAllocator for the given numa_node. + virtual void AddCUDAHostFreeVisitor(int numa_node, + const SubAllocator::Visitor& visitor); + + // Returns bus_id for the given GPU id. + virtual int BusIdForGPU(TfGpuId tf_gpu_id); protected: GPUProcessState(); @@ -103,16 +114,21 @@ class GPUProcessState { mutex mu_; - std::vector gpu_allocators_ GUARDED_BY(mu_); - std::vector> gpu_visitors_ GUARDED_BY(mu_); - std::vector cuda_host_allocators_ GUARDED_BY(mu_); + struct AllocatorParts { + std::unique_ptr allocator; + SubAllocator* sub_allocator; // owned by allocator + std::unique_ptr recording_allocator; + }; + std::vector gpu_allocators_ GUARDED_BY(mu_); + std::vector> gpu_visitors_ GUARDED_BY(mu_); - virtual ~GPUProcessState(); + std::vector cuda_host_allocators_ GUARDED_BY(mu_); + std::vector> cuda_host_alloc_visitors_ + GUARDED_BY(mu_); + std::vector> cuda_host_free_visitors_ + GUARDED_BY(mu_); - // Optional RecordingAllocators that wrap the corresponding - // Allocators for runtime attribute use analysis. - std::vector gpu_al_ GUARDED_BY(mu_); - std::vector cuda_al_ GUARDED_BY(mu_); + virtual ~GPUProcessState(); friend class GPUDeviceTest; }; diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc index 583bff2c07..6b2f6547b0 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc @@ -31,7 +31,8 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) { 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/)); @@ -49,7 +50,8 @@ TEST(PoolAllocatorTest, ZeroSizePool) { 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); EXPECT_EQ(0, pool.get_from_pool_count()); @@ -82,7 +84,8 @@ TEST(PoolAllocatorTest, Alignment) { 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); for (int i = 0; i < 16; ++i) { size_t alignment = 1 << i; @@ -97,8 +100,8 @@ TEST(PoolAllocatorTest, Alignment) { TEST(PoolAllocatorTest, AutoResize) { PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/, - new BasicCPUAllocator(0 /*numa_node*/), new NoopRounder, - "pool"); + new BasicCPUAllocator(0 /*numa_node*/, {}, {}), + new NoopRounder, "pool"); // Alloc/dealloc 10 sizes just a few times, confirming pool size // stays at 2. @@ -123,14 +126,32 @@ TEST(PoolAllocatorTest, AutoResize) { } TEST(PoolAllocatorTest, CudaHostAllocator) { + int alloc_count = 0; + int64 alloc_size = 0; + SubAllocator::Visitor alloc_visitor = + [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) { + ++alloc_count; + alloc_size += size; + }; + int free_count = 0; + int64 free_size = 0; + SubAllocator::Visitor free_visitor = + [&free_count, &free_size](void* ptr, int numa_node, int64 size) { + ++free_count; + free_size += size; + }; se::Platform* platform = se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); - PoolAllocator pool( - 2 /*pool_size_limit*/, false /*auto_resize*/, - new CUDAHostAllocator( - platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), - new NoopRounder, "pool"); + CUDAHostAllocator* sub_allocator = new CUDAHostAllocator( + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) + .ValueOrDie(), + 0 /*numa_node*/, {alloc_visitor}, {free_visitor}); + PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/, + sub_allocator, new NoopRounder, "pool"); + EXPECT_EQ(0, alloc_count); + EXPECT_EQ(0, alloc_size); + EXPECT_EQ(0, free_count); + EXPECT_EQ(0, free_size); // Repeatedly Get a 16-byte value, confirming that there's only // one real allocation. @@ -138,6 +159,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { EXPECT_EQ(0, pool.get_from_pool_count()); EXPECT_EQ(1, pool.allocated_count()); EXPECT_NE(nullptr, p1_16); + EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes + // Each suballocation includes a 16B ChunkPrefix. + static const int kChunkPrefixSize = 16; + EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size); pool.DeallocateRaw(p1_16); // Pool contents {16} EXPECT_EQ(1, pool.put_count()); @@ -148,6 +173,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { pool.DeallocateRaw(p2_16); // Put it back. // Pool contents {16} EXPECT_EQ(2, pool.put_count()); + EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes + EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(0, free_count); // Get two more values of different sizes. void* p3_4 = pool.AllocateRaw(4, 4); @@ -160,6 +188,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { void* p4_2 = pool.AllocateRaw(4, 2); // Get a third size buffer. EXPECT_NE(nullptr, p4_2); EXPECT_EQ(0, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(0, free_count); // The pool is full: when we put back p4_2, the 16-byte buffer // should be evicted since it was least recently inserted. @@ -167,6 +198,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { // Pool contents {2, 4} EXPECT_EQ(4, pool.put_count()); EXPECT_EQ(1, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(1, free_count); + EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size); // Re-getting and putting size 2 or 4 should not alter pool size or // num-evicted. @@ -180,12 +215,20 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { EXPECT_EQ(6, pool.put_count()); EXPECT_EQ(3, pool.allocated_count()); EXPECT_EQ(1, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(1, free_count); + EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size); pool.Clear(); EXPECT_EQ(0, pool.get_from_pool_count()); EXPECT_EQ(0, pool.put_count()); EXPECT_EQ(0, pool.allocated_count()); EXPECT_EQ(0, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(3, free_count); + EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size); } TEST(PoolAllocatorTest, Pow2Rounder) { @@ -206,7 +249,8 @@ TEST(PoolAllocatorTest, Name) { 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); EXPECT_EQ("pool", pool.Name()); } diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index df9c3a686c..538a70668a 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -23,12 +23,11 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" -#include "tensorflow/core/framework/allocator_registry.h" +#include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" -#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/numa.h" #ifndef INTEL_MKL_DNN_ONLY #include "i_malloc.h" @@ -40,20 +39,16 @@ typedef unsigned int uint; namespace tensorflow { -class MklSubAllocator : public SubAllocator { +class MklSubAllocator : public BasicCPUAllocator { public: + MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {} ~MklSubAllocator() override {} - - void* Alloc(size_t alignment, size_t num_bytes) override { - return port::AlignedMalloc(num_bytes, alignment); - } - void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); } }; // CPU allocator that handles small-size allocations by calling // suballocator directly. Mostly, it is just a wrapper around a suballocator // (that calls malloc and free directly) with support for bookkeeping. -class MklSmallSizeAllocator : public VisitableAllocator { +class MklSmallSizeAllocator : public Allocator { public: MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory, const string& name) @@ -75,10 +70,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { CHECK(map_.insert(map_val).second); // Increment statistics for small-size allocations. IncrementStats(num_bytes); - // Call alloc visitors. - for (const auto& visitor : alloc_visitors_) { - visitor(ptr, num_bytes); - } } return ptr; } @@ -94,9 +85,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { if (map_iter != map_.end()) { // Call free visitors. size_t dealloc_bytes = map_iter->second; - for (const auto& visitor : free_visitors_) { - visitor(ptr, dealloc_bytes); - } sub_allocator_->Free(ptr, dealloc_bytes); DecrementStats(dealloc_bytes); map_.erase(map_iter); @@ -121,16 +109,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { stats_.Clear(); } - void AddAllocVisitor(Visitor visitor) override { - mutex_lock l(mutex_); - alloc_visitors_.push_back(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - mutex_lock l(mutex_); - free_visitors_.push_back(visitor); - } - private: // Increment statistics for the allocator handling small allocations. inline void IncrementStats(size_t alloc_size) @@ -163,15 +141,11 @@ class MklSmallSizeAllocator : public VisitableAllocator { // Allocator stats for small allocs AllocatorStats stats_ GUARDED_BY(mutex_); - - // Visitors - std::vector alloc_visitors_ GUARDED_BY(mutex_); - std::vector free_visitors_ GUARDED_BY(mutex_); }; /// CPU allocator for MKL that wraps BFC allocator and intercepts /// and redirects memory allocation calls from MKL. -class MklCPUAllocator : public VisitableAllocator { +class MklCPUAllocator : public Allocator { public: // Constructor and other standard functions @@ -284,16 +258,6 @@ class MklCPUAllocator : public VisitableAllocator { large_size_allocator_->ClearStats(); } - void AddAllocVisitor(Visitor visitor) override { - small_size_allocator_->AddAllocVisitor(visitor); - large_size_allocator_->AddAllocVisitor(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - small_size_allocator_->AddFreeVisitor(visitor); - large_size_allocator_->AddFreeVisitor(visitor); - } - private: // Hooks provided by this allocator for memory allocation routines from MKL @@ -330,7 +294,7 @@ class MklCPUAllocator : public VisitableAllocator { // The alignment that we need for the allocations static constexpr const size_t kAlignment = 64; - VisitableAllocator* large_size_allocator_; // owned by this class + Allocator* large_size_allocator_; // owned by this class MklSmallSizeAllocator* small_size_allocator_; // owned by this class. SubAllocator* sub_allocator_; // not owned by this class diff --git a/tensorflow/core/common_runtime/pool_allocator.cc b/tensorflow/core/common_runtime/pool_allocator.cc index fdad8de8d6..66dc8f3322 100644 --- a/tensorflow/core/common_runtime/pool_allocator.cc +++ b/tensorflow/core/common_runtime/pool_allocator.cc @@ -40,8 +40,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize, auto_resize_(auto_resize), pool_size_limit_(pool_size_limit), allocator_(allocator), - size_rounder_(size_rounder), - allocation_begun_(false) { + size_rounder_(size_rounder) { if (auto_resize) { CHECK_LT(size_t{0}, pool_size_limit) << "size limit must be > 0 if auto_resize is true."; @@ -93,7 +92,6 @@ ChunkPrefix* FindPrefix(void* user_ptr) { } // namespace void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { - if (!allocation_begun_) allocation_begun_ = true; if (num_bytes == 0) return nullptr; // If alignment is larger than kPoolAlignment, increase num_bytes so that we @@ -129,9 +127,6 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { return PrepareChunk(r, alignment, num_bytes); } else { void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes); - for (const auto& v : alloc_visitors_) { - v(ptr, num_bytes); - } return PrepareChunk(ptr, alignment, num_bytes); } } @@ -141,9 +136,6 @@ void PoolAllocator::DeallocateRaw(void* ptr) { ChunkPrefix* cp = FindPrefix(ptr); CHECK_LE((void*)cp, (void*)ptr); if (!has_size_limit_ && !auto_resize_) { - for (const auto& v : free_visitors_) { - v(cp, cp->num_bytes); - } allocator_->Free(cp, cp->num_bytes); } else { mutex_lock lock(mutex_); @@ -164,9 +156,6 @@ void PoolAllocator::Clear() { mutex_lock lock(mutex_); for (auto iter : pool_) { PtrRecord* pr = iter.second; - for (const auto& v : free_visitors_) { - v(pr->ptr, pr->num_bytes); - } allocator_->Free(pr->ptr, pr->num_bytes); delete pr; } @@ -221,9 +210,6 @@ void PoolAllocator::EvictOne() { DCHECK(iter != pool_.end()); } pool_.erase(iter); - for (const auto& v : free_visitors_) { - v(prec->ptr, prec->num_bytes); - } allocator_->Free(prec->ptr, prec->num_bytes); delete prec; ++evicted_count_; @@ -269,28 +255,19 @@ void PoolAllocator::EvictOne() { } } -void PoolAllocator::AddAllocVisitor(Visitor visitor) { - mutex_lock lock(mutex_); - CHECK(!allocation_begun_) - << "AddAllocVisitor may not be called after pool allocation " - << "has begun."; - alloc_visitors_.push_back(visitor); -} - -void PoolAllocator::AddFreeVisitor(Visitor visitor) { - mutex_lock lock(mutex_); - CHECK(!allocation_begun_) - << "AddFreeVisitor may not be called after pool allocation " - << "has begun."; - free_visitors_.push_back(visitor); -} - void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) { - return port::AlignedMalloc(num_bytes, static_cast(alignment)); + void* ptr = nullptr; + if (num_bytes > 0) { + ptr = port::AlignedMalloc(num_bytes, static_cast(alignment)); + VisitAlloc(ptr, numa_node_, num_bytes); + } + return ptr; } void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) { - port::AlignedFree(ptr); + if (num_bytes > 0) { + VisitFree(ptr, numa_node_, num_bytes); + port::AlignedFree(ptr); + } } - } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h index 607734445b..5b4623ba10 100644 --- a/tensorflow/core/common_runtime/pool_allocator.h +++ b/tensorflow/core/common_runtime/pool_allocator.h @@ -16,14 +16,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ -// Simple LRU pool allocators for various flavors of CPU RAM that -// implement the VisitableAllocator interface. +// Simple LRU pool allocators for various flavors of CPU RAM. #include #include #include #include -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -41,7 +40,7 @@ class RoundUpInterface { // Size-limited pool of memory buffers obtained from a SubAllocator // instance. Pool eviction policy is LRU. -class PoolAllocator : public VisitableAllocator { +class PoolAllocator : public Allocator { public: // "pool_size_limit" is the maximum number of returned, re-usable // memory buffers to keep in the pool. If pool_size_limit == 0, the @@ -64,14 +63,6 @@ class PoolAllocator : public VisitableAllocator { void DeallocateRaw(void* ptr) override; - // REQUIRES: The following functions may only be called prior - // to the first Allocate*() call. Once allocation has begun, it is - // illegal to register another visitor. - - void AddAllocVisitor(Visitor visitor) override; - - void AddFreeVisitor(Visitor visitor) override; - // Allocate an unused memory region of size "num_bytes". Fetch from // the pool if available, otherwise call allocator_. void* Get(size_t num_bytes); @@ -141,12 +132,6 @@ class PoolAllocator : public VisitableAllocator { int64 put_count_ GUARDED_BY(mutex_) = 0; int64 allocated_count_ GUARDED_BY(mutex_) = 0; int64 evicted_count_ GUARDED_BY(mutex_) = 0; - // Write access to these is guarded by mutex_, but not read - // access. They may only be modified prior to the first - // allocation. Later attempts to modify will fail. - std::vector alloc_visitors_; - std::vector free_visitors_; - std::atomic allocation_begun_; }; // Do-nothing rounder. Passes through sizes unchanged. @@ -166,7 +151,9 @@ class Pow2Rounder : public RoundUpInterface { class BasicCPUAllocator : public SubAllocator { public: // Argument numa_node is currently ignored. - explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {} + BasicCPUAllocator(int numa_node, const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {} ~BasicCPUAllocator() override {} @@ -176,6 +163,8 @@ class BasicCPUAllocator : public SubAllocator { private: int numa_node_; + + TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator); }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc index 447338e7bd..bcaa37fc8a 100644 --- a/tensorflow/core/common_runtime/process_state.cc +++ b/tensorflow/core/common_runtime/process_state.cc @@ -71,20 +71,28 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) { return MemDesc(); } -VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) { +Allocator* ProcessState::GetCPUAllocator(int numa_node) { CHECK_GE(numa_node, 0); if (!numa_enabled_) numa_node = 0; mutex_lock lock(mu_); while (cpu_allocators_.size() <= static_cast(numa_node)) { + // If visitors have been defined we need an Allocator built from + // a SubAllocator. Prefer BFCAllocator, but fall back to PoolAllocator + // depending on env var setting. + const bool alloc_visitors_defined = + (!cpu_alloc_visitors_.empty() || !cpu_free_visitors_.empty()); bool use_bfc_allocator = false; - // TODO(reedwm): Switch default to BGFAllocator if it's at least as fast and - // efficient. - Status status = ReadBoolFromEnvVar("TF_CPU_ALLOCATOR_USE_BFC", false, - &use_bfc_allocator); + Status status = ReadBoolFromEnvVar( + "TF_CPU_ALLOCATOR_USE_BFC", alloc_visitors_defined, &use_bfc_allocator); if (!status.ok()) { LOG(ERROR) << "GetCPUAllocator: " << status.error_message(); } - VisitableAllocator* allocator; + Allocator* allocator = nullptr; + SubAllocator* sub_allocator = + (alloc_visitors_defined || use_bfc_allocator) + ? new BasicCPUAllocator(numa_enabled_ ? numa_node : -1, + cpu_alloc_visitors_, cpu_free_visitors_) + : nullptr; if (use_bfc_allocator) { // TODO(reedwm): evaluate whether 64GB by default is the best choice. int64 cpu_mem_limit_in_mb = -1; @@ -95,34 +103,63 @@ VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) { LOG(ERROR) << "GetCPUAllocator: " << status.error_message(); } int64 cpu_mem_limit = cpu_mem_limit_in_mb * (1LL << 20); - allocator = new BFCAllocator( - new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), cpu_mem_limit, - true /*allow_growth*/, "bfc_cpu_allocator_for_gpu" /*name*/); + DCHECK(sub_allocator); + allocator = + new BFCAllocator(sub_allocator, cpu_mem_limit, true /*allow_growth*/, + "bfc_cpu_allocator_for_gpu" /*name*/); VLOG(2) << "Using BFCAllocator with memory limit of " << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator"; - } else { - allocator = new PoolAllocator( - 100 /*pool_size_limit*/, true /*auto_resize*/, - new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), - new NoopRounder, "cpu_pool"); + } else if (alloc_visitors_defined) { + DCHECK(sub_allocator); + allocator = + new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/, + sub_allocator, new NoopRounder, "cpu_pool"); VLOG(2) << "Using PoolAllocator for ProcessState CPU allocator " << "numa_enabled_=" << numa_enabled_ << " numa_node=" << numa_node; + } else { + DCHECK(!sub_allocator); + allocator = cpu_allocator(); } - if (LogMemory::IsEnabled()) { + if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) { // Wrap the allocator to track allocation ids for better logging // at the cost of performance. - allocator = new TrackingVisitableAllocator(allocator, true); + allocator = new TrackingAllocator(allocator, true); } cpu_allocators_.push_back(allocator); + if (!sub_allocator) { + DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty()); + } } return cpu_allocators_[numa_node]; } +void ProcessState::AddCPUAllocVisitor(SubAllocator::Visitor visitor) { + VLOG(1) << "AddCPUAllocVisitor"; + mutex_lock lock(mu_); + CHECK_EQ(0, cpu_allocators_.size()) // Crash OK + << "AddCPUAllocVisitor must be called prior to first call to " + "ProcessState::GetCPUAllocator"; + cpu_alloc_visitors_.push_back(std::move(visitor)); +} + +void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) { + mutex_lock lock(mu_); + CHECK_EQ(0, cpu_allocators_.size()) // Crash OK + << "AddCPUFreeVisitor must be called prior to first call to " + "ProcessState::GetCPUAllocator"; + cpu_free_visitors_.push_back(std::move(visitor)); +} + void ProcessState::TestOnlyReset() { mutex_lock lock(mu_); + // Don't delete this value because it's static. + Allocator* default_cpu_allocator = cpu_allocator(); mem_desc_map_.clear(); - gtl::STLDeleteElements(&cpu_allocators_); + for (Allocator* a : cpu_allocators_) { + if (a != default_cpu_allocator) delete a; + } + cpu_allocators_.clear(); gtl::STLDeleteElements(&cpu_al_); } diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h index 2892677333..cac312d849 100644 --- a/tensorflow/core/common_runtime/process_state.h +++ b/tensorflow/core/common_runtime/process_state.h @@ -30,7 +30,6 @@ limitations under the License. namespace tensorflow { class Allocator; -class VisitableAllocator; class PoolAllocator; // Singleton that manages per-process state, e.g. allocation of @@ -65,7 +64,15 @@ class ProcessState { // Returns the one CPUAllocator used for the given numa_node. // TEMPORARY: ignores numa_node. - VisitableAllocator* GetCPUAllocator(int numa_node); + Allocator* GetCPUAllocator(int numa_node); + + // Registers alloc visitor for the CPU allocator(s). + // REQUIRES: must be called before GetCPUAllocator. + void AddCPUAllocVisitor(SubAllocator::Visitor v); + + // Registers free visitor for the CPU allocator(s). + // REQUIRES: must be called before GetCPUAllocator. + void AddCPUFreeVisitor(SubAllocator::Visitor v); typedef std::unordered_map MDMap; @@ -87,7 +94,9 @@ class ProcessState { mutex mu_; - std::vector cpu_allocators_ GUARDED_BY(mu_); + std::vector cpu_allocators_ GUARDED_BY(mu_); + std::vector cpu_alloc_visitors_ GUARDED_BY(mu_); + std::vector cpu_free_visitors_ GUARDED_BY(mu_); virtual ~ProcessState(); diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index 103eee03b3..9d59264899 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -72,9 +72,10 @@ class RenamedDevice : public Device { return underlying_->MakeGpuDevice(); } - void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, Allocator* allocator) override { - underlying_->ReinitializeGpuDevice(context, device, dc, allocator); + Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) override { + return underlying_->ReinitializeGpuDevice(context, device, dc, allocator); } Status MakeTensorFromProto(const TensorProto& tensor_proto, diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/common_runtime/visitable_allocator.h deleted file mode 100644 index ae0563a96a..0000000000 --- a/tensorflow/core/common_runtime/visitable_allocator.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ - -#include -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/tracking_allocator.h" - -namespace tensorflow { - -// Subclass VisitableAllocator instead of Allocator when a memory -// allocator needs to enable some kind of registration/deregistration -// of memory areas. -class VisitableAllocator : public Allocator { - public: - // Visitor gets called with a pointer to a memory area and its - // size in bytes. - typedef std::function Visitor; - - // Register a visitor guaranteed to be called exactly once on each - // chunk of memory newly allocated from the underlying device. - // Typically, chunks will be reused and possibly sub-divided by a - // pool manager, so the calls will happen only once per process - // execution, not once per tensor (re)allocation. - virtual void AddAllocVisitor(Visitor visitor) = 0; - - // Register a visitor guaranteed to be called on each chunk of - // memory returned to the underlying device. - virtual void AddFreeVisitor(Visitor visitor) = 0; -}; - -// Needed for cases when a VisitableAllocator gets wrapped for tracking. -// Multiple-inheritance is considered acceptable in this case because -// VisitableAllocator is a pure virtual interface and only TrackingAllocator -// has default implementation. -class TrackingVisitableAllocator : public TrackingAllocator, - public VisitableAllocator { - public: - TrackingVisitableAllocator(VisitableAllocator* allocator, bool track_ids) - : TrackingAllocator(allocator, track_ids), allocator_(allocator) {} - ~TrackingVisitableAllocator() override {} - - string Name() override { return TrackingAllocator::Name(); } - - void* AllocateRaw(size_t alignment, size_t num_bytes) override { - return TrackingAllocator::AllocateRaw(alignment, num_bytes); - } - - void DeallocateRaw(void* ptr) override { - TrackingAllocator::DeallocateRaw(ptr); - } - - void AddAllocVisitor(Visitor visitor) override { - allocator_->AddAllocVisitor(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - allocator_->AddFreeVisitor(visitor); - } - - protected: - VisitableAllocator* allocator_; -}; -} // namespace tensorflow -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index 2a7ee16a16..84cee5569c 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -196,7 +196,7 @@ class CPUAllocatorFactory : public AllocatorFactory { class CPUSubAllocator : public SubAllocator { public: explicit CPUSubAllocator(CPUAllocator* cpu_allocator) - : cpu_allocator_(cpu_allocator) {} + : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {} void* Alloc(size_t alignment, size_t num_bytes) override { return cpu_allocator_->AllocateRaw(alignment, num_bytes); @@ -222,4 +222,22 @@ Allocator* cpu_allocator() { } return cpu_alloc; } + +SubAllocator::SubAllocator(const std::vector& alloc_visitors, + const std::vector& free_visitors) + : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {} + +void SubAllocator::VisitAlloc(void* ptr, int index, size_t num_bytes) { + for (const auto& v : alloc_visitors_) { + v(ptr, index, num_bytes); + } +} + +void SubAllocator::VisitFree(void* ptr, int index, size_t num_bytes) { + // Although we don't guarantee any order of visitor application, strive + // to apply free visitors in reverse order of alloc visitors. + for (int i = free_visitors_.size() - 1; i >= 0; --i) { + free_visitors_[i](ptr, index, num_bytes); + } +} } // namespace tensorflow diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index ded120b704..8c23604625 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -387,13 +388,36 @@ void EnableCPUAllocatorStats(bool enable); // full statistics. By default, it's disabled. void EnableCPUAllocatorFullStats(bool enable); -// Abstract interface of an object that does the underlying suballoc/free of -// memory for a higher-level allocator. +// An object that does the underlying suballoc/free of memory for a higher-level +// allocator. The expectation is that the higher-level allocator is doing some +// kind of cache or pool management so that it will call SubAllocator::Alloc and +// Free relatively infrequently, compared to the number of times its own +// AllocateRaw and Free methods are called. class SubAllocator { public: + // Visitor gets called with a pointer to a memory area and its + // size in bytes. The index value will be numa_node for a CPU + // allocator and GPU id for a GPU allocator. + typedef std::function Visitor; + + SubAllocator(const std::vector& alloc_visitors, + const std::vector& free_visitors); + virtual ~SubAllocator() {} virtual void* Alloc(size_t alignment, size_t num_bytes) = 0; virtual void Free(void* ptr, size_t num_bytes) = 0; + + protected: + // Implementation of Alloc() method must call this on newly allocated + // value. + void VisitAlloc(void* ptr, int index, size_t num_bytes); + + // Implementation of Free() method must call this on value to be + // freed immediately before deallocation. + void VisitFree(void* ptr, int index, size_t num_bytes); + + const std::vector alloc_visitors_; + const std::vector free_visitors_; }; } // namespace tensorflow diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 794250a2c1..53ac639b4c 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -214,10 +214,12 @@ class DeviceBase { // This is overridden by GPU devices to reinitialize the derived // type returned by MakeGpuDevice. - virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/, - PerOpGpuDevice* /*device*/, - DeviceContext* /*dc*/, - Allocator* /*allocator*/) {} + virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/, + PerOpGpuDevice* /*device*/, + DeviceContext* /*dc*/, + Allocator* /*allocator*/) { + return Status::OK(); + } // Unimplemented by default virtual const DeviceAttributes& attributes() const; diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 80f2b12987..3e34bf0418 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -265,9 +265,12 @@ OpKernelContext::OpKernelContext(Params* params, int num_outputs) params_->ensure_eigen_gpu_device(); if (params_->eigen_gpu_device != nullptr) { Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes()); - params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device, - params_->op_device_context, - eigen_gpu_allocator); + Status s = params_->device->ReinitializeGpuDevice( + this, params_->eigen_gpu_device, params_->op_device_context, + eigen_gpu_allocator); + if (!s.ok()) { + SetStatus(s); + } } if (params_->record_tensor_accesses) { referenced_tensors_.Init(); -- GitLab From a76646d4b4ad5d56b5e63c139985bbd1eb98dd90 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 17 Sep 2018 17:50:50 -0700 Subject: [PATCH 0306/1357] Add type checking at the beginning of tpu.shard(). Otherwise a message like "TypeError: Tensor objects are only iterable when eager execution is enabled. To iterate over this tensor use tf.map_fn." will be thrown, which is confusing. PiperOrigin-RevId: 213371676 --- tensorflow/contrib/tpu/python/tpu/tpu.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 815a087a24..593f1d909e 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -847,8 +847,12 @@ def shard(computation, if num_shards <= 0: raise ValueError("num_shards must be a positive integer.") + inputs = [] if inputs is None else inputs + if not isinstance(inputs, list): + raise TypeError("tpu.shard()'s inputs must be a list of Tensors or None.") + # Converts inputs to Tensors. - inputs = [] if inputs is None else [ops.convert_to_tensor(x) for x in inputs] + inputs = [ops.convert_to_tensor(x) for x in inputs] if input_shard_axes is None: input_shard_axes = [0] * len(inputs) -- GitLab From 1ede512f8c185a1cc2bd88830eeca3165283f06d Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 17 Sep 2018 17:53:41 -0700 Subject: [PATCH 0307/1357] Remove some dead code after migration from python to C. PiperOrigin-RevId: 213372027 --- tensorflow/c/eager/tape.h | 12 ++++++++++++ tensorflow/python/eager/backprop.py | 21 --------------------- tensorflow/python/eager/pywrap_tfe_src.cc | 3 +++ 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index ce038a4b57..49990b6249 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -440,6 +440,18 @@ Status InitialGradients(const VSpace& vspace, return Status::OK(); } +// TODO(agarwal): use an automatic mechanism for handling None arguments to +// gradient functions. +// +// Some gradient functions can accept None arguments for gradients. The +// following maps the operation name to the indices at which the corresponding +// gradient function can accept None values. e.g. FusedBatchNorm outputs 5 +// values and hence receives 5 gradient values during backprop. However the +// gradient function uses only the first of those values and ignores the rest. +// The entry, "FusedBatchNorm": [1, 2, 3, 4], indicates that only the gradient +// corresponding to index 0 is used, and the gradient values at indices 1-4 are +// ignored (and hence can be None). The backprop algorithm can then leverage +// this by not constructing zeros to pass for those indices. gtl::FlatMap>* FunctionsAcceptingNoneForIndicesMap() { static auto* const m = new gtl::FlatMap>({ {"SoftmaxCrossEntropyWithLogits", {1}}, diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index e6cf9653a8..907234b0f8 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -120,27 +120,6 @@ def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function) -_tracing = False - - -# TODO(agarwal): use an automatic mechanism for handling None arguments to -# gradient functions. -# Some gradient functions can accept None arguments for gradients. The following -# maps the operation name to the indices at which the corresponding gradient -# function can accept None values. -# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values -# during backprop. However the gradient function uses only the first of those -# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4], -# indicates that only the gradient corresponding to index 0 is used, and the -# gradient values at indices 1-4 are ignored (and hence can be None). The -# backprop algorithm can then leverage this by not constructing zeros to -# pass for those indices. -_grad_fn_accepts_none_for_indices = { - "SoftmaxCrossEntropyWithLogits": [1], - "FusedBatchNorm": [1, 2, 3, 4] -} - - def _record_gradient(op_name, inputs, attrs, results, name): return pywrap_tensorflow.TFE_Py_RecordGradient(op_name, inputs, attrs, results, name) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9f2f4e06ad..99b46159a9 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1744,6 +1744,9 @@ PyObject* MaybeGetDTypeForAttr(const string& attr, Py_RETURN_NONE; } +// TODO(agarwal): use an automatic mechanism for handling None arguments to +// gradient functions. + // Returns a pair where the first value of the pair indicates whether or not all // outputs are unused. If the first value is false, the second value is a // set that identifies which of the output indices are unused. -- GitLab From 71fab28dc4741dedf13fea732f6b134608719bc7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 17:55:26 -0700 Subject: [PATCH 0308/1357] Increase test timeout for image_grad_test to de-flake. PiperOrigin-RevId: 213372241 --- tensorflow/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 2dc2808152..2eeae773d3 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3090,7 +3090,7 @@ cuda_py_test( cuda_py_test( name = "image_grad_test", - size = "small", + size = "medium", srcs = ["ops/image_grad_test.py"], additional_deps = [ ":client_testlib", -- GitLab From 3b7ca4b86416f6b6153de90bc1df6e6e5b41934c Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 17 Sep 2018 18:42:45 -0700 Subject: [PATCH 0309/1357] Num elements fastpath for eager tensors. PiperOrigin-RevId: 213377426 --- tensorflow/c/eager/c_api.cc | 11 +++++ tensorflow/c/eager/c_api.h | 2 + .../common_runtime/eager/tensor_handle.cc | 16 +++++++- .../core/common_runtime/eager/tensor_handle.h | 1 + tensorflow/python/eager/pywrap_tensor.cc | 41 +++++++++++-------- tensorflow/python/eager/pywrap_tensor.h | 5 ++- tensorflow/python/eager/pywrap_tfe_src.cc | 9 ++-- 7 files changed, 61 insertions(+), 24 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 6f86ea80e5..0bf3d9542b 100755 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -375,6 +375,17 @@ int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) { return result; } +int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, TF_Status* status) { + if (h == nullptr || h->handle == nullptr) { + status->status = tensorflow::errors::InvalidArgument( + "The passed in handle is a nullptr"); + return -1; + } + tensorflow::int64 result; + status->status = h->handle->NumElements(&result); + return result; +} + int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, TF_Status* status) { if (h == nullptr || h->handle == nullptr) { diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index a87d73ec8e..6323f8a053 100755 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -163,6 +163,8 @@ TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); // This function will block till the operation that produces `h` has completed. TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status); +TF_CAPI_EXPORT extern int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, + TF_Status* status); // This function will block till the operation that produces `h` has completed. TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index b912f7d37b..d58724cbfa 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -125,7 +125,6 @@ Status TensorHandle::Shape(tensorflow::TensorShape* shape) { Status TensorHandle::NumDims(int* num_dims) { if (IsRemote()) { TF_RETURN_IF_ERROR(WaitForNode(remote_shape_node_id_, false)); - CHECK(remote_shape_ != nullptr); *num_dims = remote_shape_->dims(); } else { TF_RETURN_IF_ERROR(WaitReady()); @@ -153,6 +152,21 @@ Status TensorHandle::Dim(int dim_index, int64* dim) { return Status::OK(); } +Status TensorHandle::NumElements(int64* num_elements) { + if (IsRemote()) { + TF_RETURN_IF_ERROR(WaitForNode(remote_shape_node_id_, false)); + *num_elements = remote_shape_->num_elements(); + } else { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + DCHECK(num_elements != nullptr); + + *num_elements = tensor_.NumElements(); + } + + return Status::OK(); +} + Status TensorHandle::RemoteAddress(int64* op_id, int32* output_num) { if (!IsRemote()) { return errors::FailedPrecondition( diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index 1bc9c6531a..e55f1a0338 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -113,6 +113,7 @@ class TensorHandle : public core::RefCounted { Status NumDims(int* num_dims); Status Dim(int dim_index, int64* dim); + Status NumElements(int64* num_elements); // Return the op_id and output num if the handle refers to a remote tensor. Status RemoteAddress(int64* op_id, int32* output_num); diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index f34ce6af79..5f44bd4fec 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -516,25 +516,13 @@ static PyObject* EagerTensor_rank(EagerTensor* self) { // Getter for `_num_elements`. static PyObject* EagerTensor_num_elements(EagerTensor* self) { auto handle = self->handle; - int n = TFE_TensorHandleNumDims(handle, self->status); + int n = TFE_TensorHandleNumElements(handle, self->status); if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) { // Cleanup self->status before returning. TF_SetStatus(self->status, TF_OK, ""); return nullptr; } - tensorflow::int64 value = 1; - if (PyErr_Occurred()) return nullptr; - for (int i = 0; i < n; ++i) { - int64_t dim = TFE_TensorHandleDim(handle, i, self->status); - if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) { - // Cleanup self->status before returning. - TF_SetStatus(self->status, TF_OK, ""); - PyErr_SetString(PyExc_RuntimeError, "Error while iterating dimensions"); - return nullptr; - } - value *= dim; - } - return PyLong_FromLongLong(value); + return PyLong_FromLongLong(n); } static PyObject* EagerTensor_tensor_handle(EagerTensor* self, void* unused) { @@ -777,17 +765,34 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { return reinterpret_cast(t); } -tensorflow::int64 EagerTensor_id(const PyObject* tensor) { - CHECK(EagerTensor_CheckExact(tensor)); +tensorflow::int64 PyEagerTensor_ID(const PyObject* tensor) { + DCHECK(EagerTensor_CheckExact(tensor)); return reinterpret_cast(tensor)->id; } -tensorflow::DataType EagerTensor_dtype(const PyObject* tensor) { - CHECK(EagerTensor_CheckExact(tensor)); +tensorflow::DataType PyEagerTensor_Dtype(const PyObject* tensor) { + DCHECK(EagerTensor_CheckExact(tensor)); return static_cast(TFE_TensorHandleDataType( reinterpret_cast(tensor)->handle)); } +tensorflow::int64 PyEagerTensor_NumElements(const PyObject* tensor) { + DCHECK(EagerTensor_CheckExact(tensor)); + const EagerTensor* as_c_eager_tensor = + reinterpret_cast(tensor); + tensorflow::int64 result = TFE_TensorHandleNumElements( + as_c_eager_tensor->handle, as_c_eager_tensor->status); + + if (MaybeRaiseExceptionFromTFStatus(as_c_eager_tensor->status, + PyExc_ValueError)) { + // Cleanup status before returning. + TF_SetStatus(as_c_eager_tensor->status, TF_OK, ""); + return -1; + } + + return result; +} + PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) { if (!PyType_Check(base_class)) { PyErr_SetString( diff --git a/tensorflow/python/eager/pywrap_tensor.h b/tensorflow/python/eager/pywrap_tensor.h index bc042eb19e..4eaa1ba536 100644 --- a/tensorflow/python/eager/pywrap_tensor.h +++ b/tensorflow/python/eager/pywrap_tensor.h @@ -21,8 +21,9 @@ limitations under the License. #include "tensorflow/python/lib/core/numpy.h" bool EagerTensor_CheckExact(const PyObject* o); -tensorflow::int64 EagerTensor_id(const PyObject* tensor); -tensorflow::DataType EagerTensor_dtype(const PyObject* tensor); +tensorflow::int64 PyEagerTensor_ID(const PyObject* tensor); +tensorflow::DataType PyEagerTensor_Dtype(const PyObject* tensor); +tensorflow::int64 PyEagerTensor_NumElements(const PyObject* tensor); namespace tensorflow { TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype); diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 99b46159a9..a0f6be459e 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -860,7 +860,7 @@ static tensorflow::int64 MakeInt(PyObject* integer) { static tensorflow::int64 FastTensorId(PyObject* tensor) { if (EagerTensor_CheckExact(tensor)) { - return EagerTensor_id(tensor); + return PyEagerTensor_ID(tensor); } PyObject* id_field = PyObject_GetAttrString(tensor, "_id"); if (id_field == nullptr) { @@ -873,7 +873,7 @@ static tensorflow::int64 FastTensorId(PyObject* tensor) { static tensorflow::DataType FastTensorDtype(PyObject* tensor) { if (EagerTensor_CheckExact(tensor)) { - return EagerTensor_dtype(tensor); + return PyEagerTensor_Dtype(tensor); } PyObject* dtype_field = PyObject_GetAttrString(tensor, "dtype"); if (dtype_field == nullptr) { @@ -1178,7 +1178,7 @@ void TFE_Py_TapeWatch(PyObject* tape, PyObject* tensor) { static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { if (EagerTensor_CheckExact(tensor)) { TFE_TensorHandle* t = EagerTensor_Handle(tensor); - tensorflow::int64 id = EagerTensor_id(tensor); + tensorflow::int64 id = PyEagerTensor_ID(tensor); tensorflow::TensorShape tensor_shape; const tensorflow::Status status = t->handle->Shape(&tensor_shape); @@ -1400,6 +1400,9 @@ class PyVSpace } tensorflow::int64 NumElements(PyObject* tensor) const final { + if (EagerTensor_CheckExact(tensor)) { + return PyEagerTensor_NumElements(tensor); + } PyObject* arglist = Py_BuildValue("(O)", reinterpret_cast(tensor)); PyObject* result = PyEval_CallObject(num_elements_, arglist); -- GitLab From f2a577888be8368121fe7ce16d4b72f91f53be60 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 18:45:55 -0700 Subject: [PATCH 0310/1357] Break cwise_opt_test.py into 3 files to speed up testing, since we are up against the 50 shard limit. PiperOrigin-RevId: 213377776 --- tensorflow/python/kernel_tests/BUILD | 40 + .../kernel_tests/cwise_ops_binary_test.py | 878 +++++++++++++ .../python/kernel_tests/cwise_ops_test.py | 1156 +---------------- .../kernel_tests/cwise_ops_unary_test.py | 541 ++++++++ 4 files changed, 1464 insertions(+), 1151 deletions(-) create mode 100644 tensorflow/python/kernel_tests/cwise_ops_binary_test.py create mode 100644 tensorflow/python/kernel_tests/cwise_ops_unary_test.py diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 6bba99b9e7..100240a626 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2798,6 +2798,46 @@ cuda_py_test( shard_count = 50, ) +cuda_py_test( + name = "cwise_ops_binary_test", + size = "medium", + srcs = ["cwise_ops_binary_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:math_ops_gen", + "//tensorflow/python:nn_grad", + "//tensorflow/python:platform", + "//tensorflow/python:variables", + ], + shard_count = 50, +) + +cuda_py_test( + name = "cwise_ops_unary_test", + size = "medium", + srcs = ["cwise_ops_unary_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:math_ops_gen", + "//tensorflow/python:nn_grad", + "//tensorflow/python:platform", + "//tensorflow/python:variables", + ], + shard_count = 50, +) + cuda_py_test( name = "embedding_ops_test", size = "medium", diff --git a/tensorflow/python/kernel_tests/cwise_ops_binary_test.py b/tensorflow/python/kernel_tests/cwise_ops_binary_test.py new file mode 100644 index 0000000000..8028f93a8c --- /dev/null +++ b/tensorflow/python/kernel_tests/cwise_ops_binary_test.py @@ -0,0 +1,878 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for binary coefficient-wise operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes as dtypes_lib +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util +from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_grad # pylint: disable=unused-import +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + +_ADD = lambda x, y: x + y +_SUB = lambda x, y: x - y +_MUL = lambda x, y: x * y +_POW = lambda x, y: x**y +_TRUEDIV = lambda x, y: x / y +_FLOORDIV = lambda x, y: x // y +_MOD = lambda x, y: x % y + + +# TODO(zongheng): it'd be great to factor out this function and various random +# SparseTensor gen funcs. +def _sparsify(x, thresh=0.5, index_dtype=np.int64): + x[x < thresh] = 0 + + non_zero = np.where(x) + x_indices = np.vstack(non_zero).astype(index_dtype).T + x_values = x[non_zero] + x_shape = x.shape + + return sparse_tensor.SparseTensor( + indices=x_indices, values=x_values, dense_shape=x_shape), x_values + + +def _default_tolerance(dtype): + """Returns a sensible default tolerance for comparing results of a given type. + + Args: + dtype: A datatype. + """ + if dtype == np.float16: + return 5e-3 + elif dtype in (np.float32, np.complex64): + return 1e-3 + elif dtype in (np.float64, np.complex128): + return 1e-5 + else: + return None # Fail fast for unexpected types + + +class BinaryOpTest(test.TestCase): + + def _compareCpu(self, x, y, np_func, tf_func, also_compare_variables=False): + np_ans = np_func(x, y) + with self.test_session(use_gpu=False): + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + out = tf_func(inx, iny) + tf_cpu = out.eval() + # Test that the op takes precedence over numpy operators. + np_left = tf_func(x, iny).eval() + np_right = tf_func(inx, y).eval() + + if also_compare_variables: + var_x = variables.Variable(x) + var_y = variables.Variable(y) + variables.global_variables_initializer().run() + print(type(x), type(y), type(var_x), type(var_y)) + print(type(tf_func(x, var_y)), type(tf_func(var_x, y))) + np_var_left = tf_func(x, var_y).eval() + np_var_right = tf_func(var_x, y).eval() + + if np_ans.dtype != np.object: + self.assertAllClose(np_ans, tf_cpu) + self.assertAllClose(np_ans, np_left) + self.assertAllClose(np_ans, np_right) + if also_compare_variables: + self.assertAllClose(np_ans, np_var_left) + self.assertAllClose(np_ans, np_var_right) + self.assertShapeEqual(np_ans, out) + + _GRAD_TOL = { + dtypes_lib.float16: 1e-3, + dtypes_lib.float32: 1e-3, + dtypes_lib.complex64: 1e-2, + dtypes_lib.float64: 1e-5, + dtypes_lib.complex128: 1e-4 + } + + def _compareGradientX(self, + x, + y, + np_func, + tf_func, + numeric_gradient_type=None): + z = np_func(x, y) + zs = list(z.shape) + with self.cached_session(): + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + if x.dtype in (np.float32, np.float64): + out = 1.1 * tf_func(inx, iny) + else: + out = tf_func(inx, iny) + xs = list(x.shape) + jacob_t, jacob_n = gradient_checker.compute_gradient( + inx, xs, out, zs, x_init_value=x) + if numeric_gradient_type is not None: + xf = x.astype(numeric_gradient_type) + yf = y.astype(numeric_gradient_type) + inxf = ops.convert_to_tensor(xf) + inyf = ops.convert_to_tensor(yf) + outf = tf_func(inxf, inyf) + _, jacob_n = gradient_checker.compute_gradient( + inxf, xs, outf, zs, x_init_value=xf, delta=1e-3) + jacob_n = jacob_n.astype(x.dtype) + tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)] + self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) + + def _compareGradientY(self, + x, + y, + np_func, + tf_func, + numeric_gradient_type=None): + z = np_func(x, y) + zs = list(z.shape) + with self.cached_session(): + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + if x.dtype in (np.float32, np.float64): + out = 1.1 * tf_func(inx, iny) + else: + out = tf_func(inx, iny) + ys = list(np.shape(y)) + jacob_t, jacob_n = gradient_checker.compute_gradient( + iny, ys, out, zs, x_init_value=y) + if numeric_gradient_type is not None: + xf = x.astype(numeric_gradient_type) + yf = y.astype(numeric_gradient_type) + inxf = ops.convert_to_tensor(xf) + inyf = ops.convert_to_tensor(yf) + outf = tf_func(inxf, inyf) + _, jacob_n = gradient_checker.compute_gradient( + inyf, ys, outf, zs, x_init_value=yf) + jacob_n = jacob_n.astype(x.dtype) + tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)] + self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) + + def _compareGpu(self, x, y, np_func, tf_func): + np_ans = np_func(x, y) + with self.test_session(force_gpu=test_util.is_gpu_available()): + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + out = tf_func(inx, iny) + tf_gpu = out.eval() + self.assertAllClose(np_ans, tf_gpu) + self.assertShapeEqual(np_ans, out) + # TODO(zhifengc/ke): make gradient checker work on GPU. + + def _compareBoth(self, x, y, np_func, tf_func, also_compare_variables=False): + self._compareCpu(x, y, np_func, tf_func, also_compare_variables) + if x.dtype in (np.float16, np.float32, np.float64, np.complex64, + np.complex128): + if tf_func not in (_FLOORDIV, math_ops.floordiv, math_ops.zeta, + math_ops.polygamma): + self._compareGradientX(x, y, np_func, tf_func) + self._compareGradientY(x, y, np_func, tf_func) + if tf_func in (math_ops.zeta, math_ops.polygamma): + # These methods only support gradients in the second parameter + self._compareGradientY(x, y, np_func, tf_func) + self._compareGpu(x, y, np_func, tf_func) + + def testFloatBasic(self): + x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32) + y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32) + self._compareBoth(x, y, np.add, math_ops.add, also_compare_variables=True) + self._compareBoth(x, y, np.subtract, math_ops.subtract) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) + self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv) + self._compareBoth(x, y, np.add, _ADD) + self._compareBoth(x, y, np.subtract, _SUB) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) + self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV) + self._compareBoth(x, y, np.arctan2, math_ops.atan2) + x1 = np.random.randn(5, 6).astype(np.float32) + x2 = np.random.randn(5, 6).astype(np.float32) + # Remove tiny values--atan2 gradients are flaky near the origin. + x1[np.abs(x1) < 0.05] = 0.05 * np.sign(x1[np.abs(x1) < 0.05]) + x2[np.abs(x2) < 0.05] = 0.05 * np.sign(x2[np.abs(x2) < 0.05]) + self._compareBoth(x1, x2, np.arctan2, math_ops.atan2) + try: + from scipy import special # pylint: disable=g-import-not-at-top + a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32) + x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32) + self._compareBoth(a_pos_small, x_pos_small, special.gammainc, + math_ops.igamma) + self._compareBoth(a_pos_small, x_pos_small, special.gammaincc, + math_ops.igammac) + # Need x > 1 + self._compareBoth(x_pos_small + 1, a_pos_small, special.zeta, + math_ops.zeta) + n_small = np.arange(0, 15).reshape(1, 3, 5).astype(np.float32) + self._compareBoth(n_small, x_pos_small, special.polygamma, + math_ops.polygamma) + except ImportError as e: + tf_logging.warn("Cannot test special functions: %s" % str(e)) + + def testFloatDifferentShapes(self): + x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.float32) + y = np.array([1, 2]).reshape(2, 1).astype(np.float32) + with self.cached_session() as sess: + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + s = math_ops.reduce_sum(inx * iny) + gx, gy = sess.run(gradients_impl.gradients(s, [inx, iny])) + # gx is simply the broadcasted y + self.assertAllEqual(gx, + np.array([1, 1, 2, 2]).reshape(2, 2).astype(np.float32)) + # gy is x's column summed up + self.assertAllEqual(gy, np.array([3, 7]).reshape(2, 1).astype(np.float32)) + + def testFloatVariableOverload(self): + x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.int32) + y = np.array([1, 2]).reshape(2, 1).astype(np.int32) + var_x = variables.Variable(x) + var_y = variables.Variable(y) + with self.cached_session() as sess: + sess.run([var_x.initializer, var_y.initializer]) + left_result = (var_x * y).eval() + right_result = (x * var_y).eval() + np_result = x * y + self.assertAllEqual(np_result, left_result) + self.assertAllEqual(np_result, right_result) + + def testDoubleBasic(self): + x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float64) + y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float64) + self._compareBoth(x, y, np.add, math_ops.add) + self._compareBoth(x, y, np.subtract, math_ops.subtract) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) + self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv) + self._compareBoth(x, y, np.add, _ADD) + self._compareBoth(x, y, np.subtract, _SUB) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) + self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV) + self._compareBoth(x, y, np.arctan2, math_ops.atan2) + x1 = np.random.randn(7, 4).astype(np.float64) + x2 = np.random.randn(7, 4).astype(np.float64) + # Remove tiny values--atan2 gradients are flaky near the origin. + x1[np.abs(x1) < 0.5] = 0.5 * np.sign(x1[np.abs(x1) < 0.5]) + x2[np.abs(x2) < 0.5] = 0.5 * np.sign(x2[np.abs(x2) < 0.5]) + self._compareBoth(x1, x2, np.arctan2, math_ops.atan2) + try: + from scipy import special # pylint: disable=g-import-not-at-top + a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32) + x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32) + self._compareBoth(a_pos_small, x_pos_small, special.gammainc, + math_ops.igamma) + self._compareBoth(a_pos_small, x_pos_small, special.gammaincc, + math_ops.igammac) + except ImportError as e: + tf_logging.warn("Cannot test special functions: %s" % str(e)) + + def testUint8Basic(self): + x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint8) + y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint8) + self._compareBoth(x, y, np.add, math_ops.add) + + def testInt8Basic(self): + x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int8) + y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int8) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y, np.multiply, _MUL) + + def testInt16Basic(self): + x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int16) + y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int16) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y, np.multiply, _MUL) + + def testUint16Basic(self): + x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint16) + y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint16) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y, np.true_divide, math_ops.truediv) + self._compareBoth(x, y, np.floor_divide, math_ops.floordiv) + self._compareBoth(x, y, np.true_divide, _TRUEDIV) + self._compareBoth(x, y, np.floor_divide, _FLOORDIV) + + def testInt32Basic(self): + x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int32) + y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int32) + self._compareBoth(x, y, np.add, math_ops.add) + self._compareBoth(x, y, np.subtract, math_ops.subtract) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y, np.true_divide, math_ops.truediv) + self._compareBoth(x, y, np.floor_divide, math_ops.floordiv) + self._compareBoth(x, y, np.mod, math_ops.mod) + self._compareBoth(x, y, np.add, _ADD) + self._compareBoth(x, y, np.subtract, _SUB) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y, np.true_divide, _TRUEDIV) + self._compareBoth(x, y, np.floor_divide, _FLOORDIV) + self._compareBoth(x, y, np.mod, _MOD) + # _compareBoth tests on GPU only for floating point types, so test + # _MOD for int32 on GPU by calling _compareGpu + self._compareGpu(x, y, np.mod, _MOD) + + def testInt64Basic(self): + x = np.arange(1 << 40, 13 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64) + y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int64) + self._compareBoth(x, y, np.subtract, math_ops.subtract) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y, np.true_divide, math_ops.truediv) + self._compareBoth(x, y, np.floor_divide, math_ops.floordiv) + self._compareBoth(x, y, np.mod, math_ops.mod) + self._compareBoth(x, y, np.subtract, _SUB) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y, np.true_divide, _TRUEDIV) + self._compareBoth(x, y, np.floor_divide, _FLOORDIV) + self._compareBoth(x, y, np.mod, _MOD) + + def testComplex64Basic(self): + x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype( + np.complex64) + y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype( + np.complex64) + self._compareBoth(x, y, np.add, math_ops.add) + self._compareBoth(x, y, np.subtract, math_ops.subtract) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) + self._compareBoth(x, y, np.add, _ADD) + self._compareBoth(x, y, np.subtract, _SUB) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) + + def testComplex128Basic(self): + x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype( + np.complex128) + y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype( + np.complex128) + self._compareBoth(x, y, np.add, math_ops.add) + self._compareBoth(x, y, np.subtract, math_ops.subtract) + self._compareBoth(x, y, np.multiply, math_ops.multiply) + self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) + self._compareBoth(x, y, np.add, _ADD) + self._compareBoth(x, y, np.subtract, _SUB) + self._compareBoth(x, y, np.multiply, _MUL) + self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) + + def testStringComparison(self): + x = np.array([["abc", "bh"], ["c", ""]]) + y = np.array([["abc", "bh"], ["def", "hi"]]) + with self.test_session(use_gpu=False) as sess: + cmp_eq = math_ops.equal(x, y) + cmp_not_eq = math_ops.not_equal(x, y) + values = sess.run([cmp_eq, cmp_not_eq]) + self.assertAllEqual([[True, True], [False, False]], values[0]) + self.assertAllEqual([[False, False], [True, True]], values[1]) + + def testString(self): + x = np.array([["x_0_0", "x_0_1", "x_0_2"], ["x_1_0", "x_1_1", "x_1_2"], + ["x_2_0", "x_2_1", "x_2_2"]], + dtype=np.object) + y = np.array([["y_0_0", "y_0_1", "y_0_2"], ["y_1_0", "y_1_1", "y_1_2"], + ["y_2_0", "y_2_1", "y_2_2"]], + dtype=np.object) + z = np.array([["z_0", "z_1", "z_2"]], dtype=np.object) + w = np.array("w", dtype=np.object) + self._compareCpu(x, y, _ADD, _ADD) + self._compareCpu(x, z, _ADD, _ADD) + self._compareCpu(x, w, _ADD, _ADD) + self._compareCpu(z, w, _ADD, _ADD) + + def _compareBCast(self, xs, ys, dtype, np_func, tf_func): + if dtype in (np.complex64, np.complex128): + x = (1 + np.linspace(0, 2 + 3j, np.prod(xs))).astype(dtype).reshape(xs) + y = (1 + np.linspace(0, 2 - 2j, np.prod(ys))).astype(dtype).reshape(ys) + else: + x = (1 + np.linspace(0, 5, np.prod(xs))).astype(dtype).reshape(xs) + y = (1 + np.linspace(0, 5, np.prod(ys))).astype(dtype).reshape(ys) + self._compareCpu(x, y, np_func, tf_func) + if x.dtype in (np.float16, np.float32, np.float64): + # TODO(aselle): Make the test work for dtypes: + # (np.complex64, np.complex128). + if tf_func not in (_FLOORDIV, math_ops.floordiv): + if x.dtype == np.float16: + # Compare fp16 theoretical gradients to fp32 numerical gradients, + # since fp16 numerical gradients are too imprecise unless great + # care is taken with choosing the inputs and the delta. This is + # a weaker check (in particular, it does not test the op itself, + # only its gradient), but it's much better than nothing. + self._compareGradientX(x, y, np_func, tf_func, np.float) + self._compareGradientY(x, y, np_func, tf_func, np.float) + else: + self._compareGradientX(x, y, np_func, tf_func) + self._compareGradientY(x, y, np_func, tf_func) + self._compareGpu(x, y, np_func, tf_func) + + # TODO(josh11b,vrv): Refactor this to use parameterized tests. + def _testBCastByFunc(self, funcs, xs, ys): + dtypes = [ + np.float16, + np.float32, + np.float64, + np.int32, + np.int64, + np.complex64, + np.complex128, + ] + for dtype in dtypes: + for (np_func, tf_func) in funcs: + if (dtype in (np.complex64, np.complex128) and + tf_func in (_FLOORDIV, math_ops.floordiv)): + continue # floordiv makes no sense for complex numbers + self._compareBCast(xs, ys, dtype, np_func, tf_func) + self._compareBCast(ys, xs, dtype, np_func, tf_func) + + def _testBCastA(self, xs, ys): + funcs = [ + (np.add, math_ops.add), + (np.add, _ADD), + ] + self._testBCastByFunc(funcs, xs, ys) + + def _testBCastB(self, xs, ys): + funcs = [ + (np.subtract, math_ops.subtract), + (np.subtract, _SUB), + (np.power, math_ops.pow), + ] + self._testBCastByFunc(funcs, xs, ys) + + def _testBCastC(self, xs, ys): + funcs = [ + (np.multiply, math_ops.multiply), + (np.multiply, _MUL), + ] + self._testBCastByFunc(funcs, xs, ys) + + def _testBCastD(self, xs, ys): + funcs = [ + (np.true_divide, math_ops.truediv), + (np.floor_divide, math_ops.floordiv), + (np.true_divide, _TRUEDIV), + (np.floor_divide, _FLOORDIV), + ] + self._testBCastByFunc(funcs, xs, ys) + + def testBCast_0A(self): + self._testBCastA([1, 3, 2], [1]) + + def testBCast_0B(self): + self._testBCastB([1, 3, 2], [1]) + + def testBCast_0C(self): + self._testBCastC([1, 3, 2], [1]) + + def testBCast_0D(self): + self._testBCastD([1, 3, 2], [1]) + + def testBCast_1A(self): + self._testBCastA([1, 3, 2], [2]) + + def testBCast_1B(self): + self._testBCastB([1, 3, 2], [2]) + + def testBCast_1C(self): + self._testBCastC([1, 3, 2], [2]) + + def testBCast_1D(self): + self._testBCastD([1, 3, 2], [2]) + + def testBCast_2A(self): + self._testBCastA([1, 3, 2], [3, 2]) + + def testBCast_2B(self): + self._testBCastB([1, 3, 2], [3, 2]) + + def testBCast_2C(self): + self._testBCastC([1, 3, 2], [3, 2]) + + def testBCast_2D(self): + self._testBCastD([1, 3, 2], [3, 2]) + + def testBCast_3A(self): + self._testBCastA([1, 3, 2], [3, 1]) + + def testBCast_3B(self): + self._testBCastB([1, 3, 2], [3, 1]) + + def testBCast_3C(self): + self._testBCastC([1, 3, 2], [3, 1]) + + def testBCast_3D(self): + self._testBCastD([1, 3, 2], [3, 1]) + + def testBCast_4A(self): + self._testBCastA([1, 3, 2], [1, 3, 2]) + + def testBCast_4B(self): + self._testBCastB([1, 3, 2], [1, 3, 2]) + + def testBCast_4C(self): + self._testBCastC([1, 3, 2], [1, 3, 2]) + + def testBCast_4D(self): + self._testBCastD([1, 3, 2], [1, 3, 2]) + + def testBCast_5A(self): + self._testBCastA([1, 3, 2], [2, 3, 1]) + + def testBCast_5B(self): + self._testBCastB([1, 3, 2], [2, 3, 1]) + + def testBCast_5C(self): + self._testBCastC([1, 3, 2], [2, 3, 1]) + + def testBCast_5D(self): + self._testBCastD([1, 3, 2], [2, 3, 1]) + + def testBCast_6A(self): + self._testBCastA([1, 3, 2], [2, 1, 1]) + + def testBCast_6B(self): + self._testBCastB([1, 3, 2], [2, 1, 1]) + + def testBCast_6C(self): + self._testBCastC([1, 3, 2], [2, 1, 1]) + + def testBCast_6D(self): + self._testBCastD([1, 3, 2], [2, 1, 1]) + + def testBCast_7A(self): + self._testBCastA([1, 3, 2], [1, 3, 1]) + + def testBCast_7B(self): + self._testBCastB([1, 3, 2], [1, 3, 1]) + + def testBCast_7C(self): + self._testBCastC([1, 3, 2], [1, 3, 1]) + + def testBCast_7D(self): + self._testBCastD([1, 3, 2], [1, 3, 1]) + + def testBCast_8A(self): + self._testBCastA([2, 1, 5], [2, 3, 1]) + + def testBCast_8B(self): + self._testBCastB([2, 1, 5], [2, 3, 1]) + + def testBCast_8C(self): + self._testBCastC([2, 1, 5], [2, 3, 1]) + + def testBCast_8D(self): + self._testBCastD([2, 1, 5], [2, 3, 1]) + + def testBCast_9A(self): + self._testBCastA([2, 0, 5], [2, 0, 1]) + + def testBCast_9B(self): + self._testBCastB([2, 0, 5], [2, 0, 1]) + + def testBCast_9C(self): + self._testBCastC([2, 0, 5], [2, 0, 1]) + + def testBCast_9D(self): + self._testBCastD([2, 0, 5], [2, 0, 1]) + + def testBCast_10A(self): + self._testBCastA([2, 3, 0], [2, 3, 1]) + + def testBCast_10B(self): + self._testBCastB([2, 3, 0], [2, 3, 1]) + + def testBCast_10C(self): + self._testBCastC([2, 3, 0], [2, 3, 1]) + + def testBCast_10D(self): + self._testBCastD([2, 3, 0], [2, 3, 1]) + + def testBCast_11A(self): + self._testBCastA([1, 3, 2], [1, 3, 2]) + + def testBCast_11B(self): + self._testBCastB([1, 3, 2], [1, 3, 2]) + + def testBCast_11C(self): + self._testBCastC([1, 3, 2], [1, 3, 2]) + + def testBCast_11D(self): + self._testBCastD([1, 3, 2], [1, 3, 2]) + + def testBCast_12A(self): + self._testBCastA([1, 1, 1, 1, 3, 2], [1, 3, 2]) + + def testBCast_12B(self): + self._testBCastB([1, 1, 1, 1, 3, 2], [1, 3, 2]) + + def testBCast_12C(self): + self._testBCastC([1, 1, 1, 1, 3, 2], [1, 3, 2]) + + def testBCast_12D(self): + self._testBCastD([1, 1, 1, 1, 3, 2], [1, 3, 2]) + + def testBCast_13A(self): + self._testBCastA([1, 3, 2, 1, 1], [1]) + + def testBCast_13B(self): + self._testBCastB([1, 3, 2, 1, 1], [1]) + + def testBCast_13C(self): + self._testBCastC([1, 3, 2, 1, 1], [1]) + + def testBCast_13D(self): + self._testBCastD([1, 3, 2, 1, 1], [1]) + + def testBCast_14A(self): + self._testBCastA([2, 3, 1, 1, 5], [1]) + + def testBCast_14B(self): + self._testBCastB([2, 3, 1, 1, 5], [1]) + + def testBCast_14C(self): + self._testBCastC([2, 3, 1, 1, 5], [1]) + + def testBCast_14D(self): + self._testBCastD([2, 3, 1, 1, 5], [1]) + + def testBCast_15A(self): + self._testBCastA([10, 3, 1, 2], [3, 1, 2]) + + def testBCast_15B(self): + self._testBCastB([10, 3, 1, 2], [3, 1, 2]) + + def testBCast_15C(self): + self._testBCastC([10, 3, 1, 2], [3, 1, 2]) + + def testBCast_15D(self): + self._testBCastD([10, 3, 1, 2], [3, 1, 2]) + + def testMismatchedDimensions(self): + for func in [ + math_ops.add, math_ops.subtract, math_ops.multiply, math_ops.div, _ADD, + _SUB, _MUL, _TRUEDIV, _FLOORDIV + ]: + with self.assertRaisesWithPredicateMatch( + ValueError, lambda e: "Dimensions must" in str(e)): + func( + ops.convert_to_tensor([10.0, 20.0, 30.0]), + ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]])) + + def testZeroPowGrad(self): + with self.cached_session(): + for dtype in (np.float16, np.float32, np.float64, np.complex64, + np.complex128): + x = constant_op.constant(0.0, dtype=dtype) + y = constant_op.constant(2.0, dtype=dtype) + z = math_ops.pow(x, y) + error = gradient_checker.compute_gradient_error(y, [], z, []) + self.assertEqual(error, 0) + + def testComplexPowGrad(self): + with self.cached_session(): + for dtype in np.complex64, np.complex128: + for base in 2.0, -2.0: + x = constant_op.constant(base, dtype=dtype) + y = constant_op.constant(2.0, dtype=dtype) + z = math_ops.pow(x, y) + error = gradient_checker.compute_gradient_error(y, [], z, []) + self.assertLess(error, 2e-4) + + def testAtan2SpecialValues(self): + x1l, x2l = zip((+0.0, +0.0), (+0.0, -0.0), (-0.0, +0.0), (-0.0, -0.0), + (1.2345, float("inf")), (1.2345, -float("inf")), + (-4.321, float("inf")), (-4.125, -float("inf")), + (float("inf"), float("inf")), (float("inf"), -float("inf")), + (-float("inf"), float("inf")), + (-float("inf"), -float("inf"))) + for dtype in np.float32, np.float64: + x1 = np.array(x1l).astype(dtype) + x2 = np.array(x2l).astype(dtype) + self._compareCpu(x1, x2, np.arctan2, math_ops.atan2) + self._compareGpu(x1, x2, np.arctan2, math_ops.atan2) + + def testPowNegativeExponent(self): + for dtype in [np.int32, np.int64]: + with self.test_session(use_gpu=False) as sess: + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Integers to negative integer powers are not allowed"): + x = np.array([5, 2]).astype(dtype) + y = np.array([-2, 3]).astype(dtype) + sess.run(math_ops.pow(x, y)) + + with self.test_session(use_gpu=False) as sess: + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Integers to negative integer powers are not allowed"): + x = np.array([5, 2]).astype(dtype) + y = np.array([2, -3]).astype(dtype) + sess.run(math_ops.pow(x, y)) + + with self.test_session(use_gpu=False) as sess: + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Integers to negative integer powers are not allowed"): + x = np.array([5, 2]).astype(dtype) + y = -3 + sess.run(math_ops.pow(x, y)) + + +class ComparisonOpTest(test.TestCase): + + def _compareScalar(self, func, x, y, dtype): + with self.test_session(force_gpu=test_util.is_gpu_available()): + out = func( + ops.convert_to_tensor(np.array([x]).astype(dtype)), + ops.convert_to_tensor(np.array([y]).astype(dtype))) + ret = out.eval() + return ret[0] + + def testScalarCompareScalar(self): + dtypes = [np.float16, np.float32, np.float64, np.int32, np.int64] + data = [-1, 0, 1] + for t in dtypes: + for x in data: + for y in data: + self.assertEqual(self._compareScalar(math_ops.less, x, y, t), x < y) + self.assertEqual( + self._compareScalar(math_ops.less_equal, x, y, t), x <= y) + self.assertEqual( + self._compareScalar(math_ops.greater, x, y, t), x > y) + self.assertEqual( + self._compareScalar(math_ops.greater_equal, x, y, t), x >= y) + self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y) + self.assertEqual( + self._compareScalar(math_ops.not_equal, x, y, t), x != y) + data = [-1, 0, 1, -1j, 1j, 1 + 1j, 1 - 1j] + for t in [np.complex64, np.complex128]: + for x in data: + for y in data: + self.assertEqual(self._compareScalar(math_ops.equal, x, y, t), x == y) + self.assertEqual( + self._compareScalar(math_ops.not_equal, x, y, t), x != y) + + def _compare(self, x, y, np_func, tf_func): + np_ans = np_func(x, y) + with self.test_session(force_gpu=test_util.is_gpu_available()): + out = tf_func(ops.convert_to_tensor(x), ops.convert_to_tensor(y)) + tf_ans = out.eval() + self.assertAllEqual(np_ans, tf_ans) + + def testTensorCompareTensor(self): + x = np.linspace(-15, 15, 6).reshape(1, 3, 2) + y = np.linspace(20, -10, 6).reshape(1, 3, 2) + for t in [np.float16, np.float32, np.float64, np.int32, np.int64]: + xt = x.astype(t) + yt = y.astype(t) + self._compare(xt, yt, np.less, math_ops.less) + self._compare(xt, yt, np.less_equal, math_ops.less_equal) + self._compare(xt, yt, np.greater, math_ops.greater) + self._compare(xt, yt, np.greater_equal, math_ops.greater_equal) + self._compare(xt, yt, np.equal, math_ops.equal) + self._compare(xt, yt, np.not_equal, math_ops.not_equal) + # Complex types do not support ordering but do support equality tests. + for t in [np.complex64, np.complex128]: + xt = x.astype(t) + xt -= 1j * xt + yt = y.astype(t) + yt -= 1j * yt + self._compare(xt, yt, np.equal, math_ops.equal) + self._compare(xt, yt, np.not_equal, math_ops.not_equal) + + def _compareBCast(self, xs, ys, dtype, np_func, tf_func): + x = np.linspace(-15, 15, np.prod(xs)).astype(dtype).reshape(xs) + y = np.linspace(20, -10, np.prod(ys)).astype(dtype).reshape(ys) + if dtype in (np.complex64, np.complex128): + x -= 1j * x + y -= 1j * y + self._compare(x, y, np_func, tf_func) + self._compare(y, x, np_func, tf_func) + + def _testBCastByFunc(self, np_func, tf_func, include_complex=False): + shapes = [ + ([1, 3, 2], [1]), + ([1, 3, 2], [2]), + ([1, 3, 2], [3, 2]), + ([1, 3, 2], [3, 1]), + ([1, 3, 2], [1, 3, 2]), + ([1, 3, 2], [2, 3, 1]), + ([1, 3, 2], [2, 1, 1]), + ([1, 3, 2], [1, 3, 1]), + ([2, 1, 5], [2, 3, 1]), + ([2, 0, 5], [2, 0, 1]), + ([2, 3, 0], [2, 3, 1]), + ] + dtypes = [ + np.float16, + np.float32, + np.float64, + np.int32, + np.int64, + ] + if include_complex: + dtypes.extend([np.complex64, np.complex128]) + + for (xs, ys) in shapes: + for dtype in dtypes: + self._compareBCast(xs, ys, dtype, np_func, tf_func) + + def testBCastLess(self): + self._testBCastByFunc(np.less, math_ops.less) + + def testBCastLessEqual(self): + self._testBCastByFunc(np.less_equal, math_ops.less_equal) + + def testBCastGreater(self): + self._testBCastByFunc(np.greater, math_ops.greater) + + def testBCastGreaterEqual(self): + self._testBCastByFunc(np.greater_equal, math_ops.greater_equal) + + def testBCastEqual(self): + self._testBCastByFunc(np.equal, math_ops.equal, include_complex=True) + + def testBCastNotEqual(self): + self._testBCastByFunc( + np.not_equal, math_ops.not_equal, include_complex=True) + + def testShapeMismatch(self): + dtypes = [np.float16, np.float32, np.float64, np.int32, np.int64] + funcs = [ + math_ops.less, math_ops.less_equal, math_ops.greater, + math_ops.greater_equal, math_ops.equal, math_ops.not_equal + ] + x = np.arange(0, 10).reshape([2, 5]) + y = np.arange(0, 10).reshape([5, 2]) + for t in dtypes: + for f in funcs: + with self.assertRaisesWithPredicateMatch( + ValueError, lambda e: "Dimensions must" in str(e)): + f(x.astype(t), y.astype(t)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 00d7f956c2..c5311ad834 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -18,25 +18,19 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib -from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gradient_checker -from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import variables from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging _ADD = lambda x, y: x + y _SUB = lambda x, y: x - y @@ -45,8 +39,6 @@ _POW = lambda x, y: x**y _TRUEDIV = lambda x, y: x / y _FLOORDIV = lambda x, y: x // y _MOD = lambda x, y: x % y -_NEG = lambda x: -x -_ABS = abs _LT = lambda x, y: x < y _LE = lambda x, y: x <= y @@ -74,8 +66,11 @@ def _sparsify(x, thresh=0.5, index_dtype=np.int64): def _default_tolerance(dtype): - """Returns a sensible default tolerance for comparing results of a given - type""" + """Returns a sensible default tolerance for comparing results of a given type. + + Args: + dtype: A datatype. + """ if dtype == np.float16: return 5e-3 elif dtype in (np.float32, np.complex64): @@ -86,1147 +81,6 @@ def _default_tolerance(dtype): return None # Fail fast for unexpected types -class UnaryOpTest(test.TestCase): - - def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None): - if grad_rtol is None: - grad_rtol = _default_tolerance(x.dtype) - if grad_atol is None: - grad_atol = _default_tolerance(x.dtype) - np_ans = np_func(x) - with self.test_session(use_gpu=False): - inx = ops.convert_to_tensor(x) - if x.dtype in (np.float32, np.float64, - dtypes_lib.bfloat16.as_numpy_dtype): - y = 1.1 * tf_func(inx) - np_ans *= 1.1 - else: - y = tf_func(inx) - tf_cpu = y.eval() - self.assertShapeEqual(np_ans, y) - if x.dtype == np.float16: - self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3) - elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: - self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2) - else: - self.assertAllClose(np_ans, tf_cpu) - - if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign: - return # Return early - - if x.dtype == np.float16: - s = list(np.shape(x)) - jacob_t, _ = gradient_checker.compute_gradient( - inx, s, y, s, x_init_value=x) - xf = x.astype(np.float) - inxf = ops.convert_to_tensor(xf) - yf = tf_func(inxf) - _, jacob_n = gradient_checker.compute_gradient( - inxf, s, yf, s, x_init_value=xf, delta=1e-2) - jacob_n = jacob_n.astype(np.float16) - self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) - elif x.dtype in (np.float32, np.complex64): - s = list(np.shape(x)) - jacob_t, jacob_n = gradient_checker.compute_gradient( - inx, s, y, s, x_init_value=x, delta=1e-3) - self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) - elif x.dtype in (np.float64, np.complex128): - s = list(np.shape(x)) - jacob_t, jacob_n = gradient_checker.compute_gradient( - inx, s, y, s, x_init_value=x, delta=1e-5) - self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) - - def _check(self, result_tensor, result_np, input_sp_t, tol): - self.assertTrue(isinstance(result_tensor, sparse_tensor.SparseTensor)) - self.assertTrue(isinstance(input_sp_t, sparse_tensor.SparseTensor)) - self.assertAllEqual(input_sp_t.indices.eval(), result_tensor.indices.eval()) - self.assertAllEqual(input_sp_t.dense_shape.eval(), - result_tensor.dense_shape.eval()) - if tol is None: - self.assertAllClose(result_np, result_tensor.values.eval()) - else: - self.assertAllClose( - result_np, result_tensor.values.eval(), rtol=tol, atol=tol) - - def _compareSparseCpu(self, x, np_func, tf_func, tol): - x_sp, x_sp_vals = _sparsify(x) - res_np = np_func(x_sp_vals) - with self.test_session(use_gpu=False): - self._check(tf_func(x_sp), res_np, x_sp, tol) - - def _compareGpu(self, x, np_func, tf_func): - np_ans = np_func(x) - with self.test_session(force_gpu=test_util.is_gpu_available()): - result = tf_func(ops.convert_to_tensor(x)) - tf_gpu = result.eval() - if x.dtype == np.float16: - self.assertAllClose(np_ans, tf_gpu, rtol=1e-3, atol=1e-3) - else: - self.assertAllClose(np_ans, tf_gpu) - # TODO(zhifengc/ke): make gradient checker work on GPU. - - def _compareSparseGpu(self, x, np_func, tf_func, tol): - x_sp, x_sp_vals = _sparsify(x) - res_np = np_func(x_sp_vals) - with self.test_session(force_gpu=test_util.is_gpu_available()): - self._check(tf_func(x_sp), res_np, x_sp, tol) - - def _compareBoth(self, x, np_func, tf_func): - self._compareCpu(x, np_func, tf_func) - self._compareGpu(x, np_func, tf_func) - - def _compareBothSparse(self, x, np_func, tf_func, tol=None): - self._compareSparseCpu(x, np_func, tf_func, tol) - self._compareSparseGpu(x, np_func, tf_func, tol) - - def _inv(self, x): - return 1.0 / x - - def _rsqrt(self, x): - return self._inv(np.sqrt(x)) - - def _sigmoid(self, x): - return 1.0 / (1.0 + np.exp(-x)) - - def _log_sigmoid(self, x): - return np.log(self._sigmoid(x)) - - def _replace_domain_error_with_inf(self, fn): - - def func(x): - try: - return fn(x) - except ValueError as e: - if "domain error" in str(e): - return np.inf * np.ones_like(x) - else: - raise e - - return func - - def testFloatBasic(self): - x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float32) - w = x - x.min() + 1.02 # all greater than 1 - y = (x + .5).astype(np.float32) # no zero - z = (x + 15.5).astype(np.float32) # all positive - k = np.arange(-0.90, 0.90, 0.25).astype(np.float32) # between -1 and 1 - - self._compareBoth(x, np.abs, math_ops.abs) - self._compareBoth(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareBoth(y, self._inv, math_ops.reciprocal) - self._compareBoth(x, np.square, math_ops.square) - self._compareBoth(z, np.sqrt, math_ops.sqrt) - self._compareBoth(z, self._rsqrt, math_ops.rsqrt) - self._compareBoth(x, np.exp, math_ops.exp) - self._compareBoth(x, np.expm1, math_ops.expm1) - self._compareBoth(z, np.log, math_ops.log) - self._compareBoth(z, np.log1p, math_ops.log1p) - self._compareBoth(x, np.sinh, math_ops.sinh) - self._compareBoth(x, np.cosh, math_ops.cosh) - self._compareBoth(x, np.tanh, math_ops.tanh) - self._compareBoth(x, np.arcsinh, math_ops.asinh) - self._compareBoth(w, np.arccosh, math_ops.acosh) - self._compareBoth(k, np.arctanh, math_ops.atanh) - self._compareBoth(x, self._sigmoid, math_ops.sigmoid) - self._compareBoth(x, self._log_sigmoid, math_ops.log_sigmoid) - self._compareBoth(y, np.sign, math_ops.sign) - self._compareBoth(x, np.sin, math_ops.sin) - self._compareBoth(x, np.cos, math_ops.cos) - self._compareBoth(k, np.arcsin, math_ops.asin) - self._compareBoth(k, np.arccos, math_ops.acos) - self._compareBoth(x, np.arctan, math_ops.atan) - self._compareBoth(x, np.tan, math_ops.tan) - self._compareBoth(y, - np.vectorize( - self._replace_domain_error_with_inf(math.lgamma)), - math_ops.lgamma) - self._compareBoth(x, np.vectorize(math.erf), math_ops.erf) - self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc) - try: - from scipy import special # pylint: disable=g-import-not-at-top - self._compareBoth(x, special.i0e, math_ops.bessel_i0e) - self._compareBoth(x, special.i1e, math_ops.bessel_i1e) - except ImportError as e: - tf_logging.warn("Cannot test special functions: %s" % str(e)) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3) - self._compareBothSparse(x, np.tanh, math_ops.tanh) - self._compareBothSparse(y, np.sign, math_ops.sign) - self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf) - - def testFloatTanhEdge(self): - x = np.arange(40, 40 + 6).reshape(6).astype(np.float32) - self._compareBoth(x, np.tanh, math_ops.tanh) - x = np.arange(-40, -40 + 6).reshape(6).astype(np.float32) - self._compareBoth(x, np.tanh, math_ops.tanh) - - def testFloatEmpty(self): - x = np.empty((2, 0, 5), dtype=np.float32) - self._compareBoth(x, np.abs, math_ops.abs) - self._compareBoth(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareBoth(x, self._inv, math_ops.reciprocal) - self._compareBoth(x, np.square, math_ops.square) - self._compareBoth(x, np.sqrt, math_ops.sqrt) - self._compareBoth(x, self._rsqrt, math_ops.rsqrt) - self._compareBoth(x, np.exp, math_ops.exp) - self._compareBoth(x, np.expm1, math_ops.expm1) - self._compareBoth(x, np.log, math_ops.log) - self._compareBoth(x, np.log1p, math_ops.log1p) - self._compareBoth(x, np.sinh, math_ops.sinh) - self._compareBoth(x, np.arcsinh, math_ops.asinh) - self._compareBoth(x, np.cosh, math_ops.cosh) - self._compareBoth(x, np.tanh, math_ops.tanh) - self._compareBoth(x, self._sigmoid, math_ops.sigmoid) - self._compareBoth(x, np.sign, math_ops.sign) - self._compareBoth(x, np.sin, math_ops.sin) - self._compareBoth(x, np.cos, math_ops.cos) - # Can't use vectorize below, so just use some arbitrary function - self._compareBoth(x, np.sign, math_ops.lgamma) - self._compareBoth(x, np.sign, math_ops.erf) - self._compareBoth(x, np.sign, math_ops.erfc) - self._compareBoth(x, np.tan, math_ops.tan) - self._compareBoth(x, np.arcsin, math_ops.asin) - self._compareBoth(x, np.arccos, math_ops.acos) - self._compareBoth(x, np.arctan, math_ops.atan) - try: - from scipy import special # pylint: disable=g-import-not-at-top - self._compareBoth(x, special.i0e, math_ops.bessel_i0e) - self._compareBoth(x, special.i1e, math_ops.bessel_i1e) - except ImportError as e: - tf_logging.warn("Cannot test special functions: %s" % str(e)) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(x, np.sqrt, math_ops.sqrt, tol=1e-3) - self._compareBothSparse(x, np.tanh, math_ops.tanh) - self._compareBothSparse(x, np.sign, math_ops.sign) - self._compareBothSparse(x, np.sign, math_ops.erf) - - def testDoubleBasic(self): - x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float64) - w = x - x.min() + 1.02 # all greater than 1 - y = (x + .5).astype(np.float64) # no zero - z = (x + 15.5).astype(np.float64) # all positive - k = np.arange(-0.90, 0.90, - 0.35).reshape(1, 3, 2).astype(np.float64) # between -1 and 1 - self._compareBoth(x, np.abs, math_ops.abs) - self._compareBoth(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareBoth(y, self._inv, math_ops.reciprocal) - self._compareBoth(x, np.square, math_ops.square) - self._compareBoth(z, np.sqrt, math_ops.sqrt) - self._compareBoth(z, self._rsqrt, math_ops.rsqrt) - self._compareBoth(x, np.exp, math_ops.exp) - self._compareBoth(x, np.expm1, math_ops.expm1) - self._compareBoth(z, np.log, math_ops.log) - self._compareBoth(z, np.log1p, math_ops.log1p) - self._compareBoth(x, np.sinh, math_ops.sinh) - self._compareBoth(x, np.cosh, math_ops.cosh) - self._compareBoth(x, np.tanh, math_ops.tanh) - self._compareBoth(x, np.arcsinh, math_ops.asinh) - self._compareBoth(w, np.arccosh, math_ops.acosh) - self._compareBoth(k, np.arctanh, math_ops.atanh) - self._compareBoth(x, self._sigmoid, math_ops.sigmoid) - self._compareBoth(y, np.sign, math_ops.sign) - self._compareBoth(x, np.sin, math_ops.sin) - self._compareBoth(x, np.cos, math_ops.cos) - self._compareBoth(y, - np.vectorize( - self._replace_domain_error_with_inf(math.lgamma)), - math_ops.lgamma) - self._compareBoth(x, np.vectorize(math.erf), math_ops.erf) - self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc) - self._compareBoth(x, np.arctan, math_ops.atan) - self._compareBoth(k, np.arcsin, math_ops.asin) - self._compareBoth(k, np.arccos, math_ops.acos) - self._compareBoth(k, np.tan, math_ops.tan) - try: - from scipy import special # pylint: disable=g-import-not-at-top - self._compareBoth(x, special.i0e, math_ops.bessel_i0e) - self._compareBoth(x, special.i1e, math_ops.bessel_i1e) - except ImportError as e: - tf_logging.warn("Cannot test special functions: %s" % str(e)) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3) - self._compareBothSparse(x, np.tanh, math_ops.tanh) - self._compareBothSparse(y, np.sign, math_ops.sign) - self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf) - - def testHalfBasic(self): - x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float16) - y = (x + .5).astype(np.float16) # no zero - z = (x + 15.5).astype(np.float16) # all positive - self._compareBoth(x, np.abs, math_ops.abs) - self._compareBoth(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareBoth(y, self._inv, math_ops.reciprocal) - self._compareBoth(x, np.square, math_ops.square) - self._compareBoth(z, np.sqrt, math_ops.sqrt) - self._compareBoth(z, self._rsqrt, math_ops.rsqrt) - self._compareBoth(x, np.exp, math_ops.exp) - self._compareBoth(x, np.expm1, math_ops.expm1) - self._compareBoth(z, np.log, math_ops.log) - self._compareBoth(z, np.log1p, math_ops.log1p) - self._compareBoth(x, np.tanh, math_ops.tanh) - self._compareBoth(x, self._sigmoid, math_ops.sigmoid) - self._compareBoth(y, np.sign, math_ops.sign) - self._compareBoth(x, np.sin, math_ops.sin) - self._compareBoth(x, np.cos, math_ops.cos) - self._compareBoth(y, - np.vectorize( - self._replace_domain_error_with_inf(math.lgamma)), - math_ops.lgamma) - self._compareBoth(x, np.vectorize(math.erf), math_ops.erf) - self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc) - try: - from scipy import special # pylint: disable=g-import-not-at-top - self._compareBoth(x, special.i0e, math_ops.bessel_i0e) - self._compareBoth(x, special.i1e, math_ops.bessel_i1e) - except ImportError as e: - tf_logging.warn("Cannot test special functions: %s" % str(e)) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3) - self._compareBothSparse(x, np.tanh, math_ops.tanh) - self._compareBothSparse(y, np.sign, math_ops.sign) - self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3) - - def testInt32Basic(self): - x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int32) - self._compareCpu(x, np.abs, math_ops.abs) - self._compareCpu(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareBoth(x, np.square, math_ops.square) - self._compareCpu(x, np.sign, math_ops.sign) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(x, np.sign, math_ops.sign) - - def testInt64Basic(self): - x = np.arange(-6 << 40, 6 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64) - self._compareCpu(x, np.abs, math_ops.abs) - self._compareCpu(x, np.abs, _ABS) - self._compareCpu(x, np.negative, math_ops.negative) - self._compareCpu(x, np.negative, _NEG) - self._compareCpu(x, np.sign, math_ops.sign) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.sign, math_ops.sign) - - def testInt64Square(self): - x = np.arange(-6 << 20, 6 << 20, 2 << 20).reshape(1, 3, 2).astype(np.int64) - self._compareCpu(x, np.square, math_ops.square) - self._compareBothSparse(x, np.square, math_ops.square) - - def testComplex64Basic(self): - x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype( - np.complex64) - y = x + np.complex(0.5, 0.5) # no zeros - self._compareBoth(x, np.abs, math_ops.abs) - self._compareBoth(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareCpu(y, self._inv, math_ops.reciprocal) - self._compareCpu(x, np.square, math_ops.square) - self._compareCpu(y, np.sqrt, math_ops.sqrt) - self._compareCpu(y, self._rsqrt, math_ops.rsqrt) - self._compareBoth(x, np.exp, math_ops.exp) - self._compareCpu(x, np.expm1, math_ops.expm1) - self._compareCpu(y, np.log, math_ops.log) - self._compareCpu(y, np.log1p, math_ops.log1p) - self._compareCpu(x, np.sinh, math_ops.sinh) - self._compareCpu(x, np.cosh, math_ops.cosh) - self._compareCpu(x, np.tanh, math_ops.tanh) - - # Complex64 versions of asinh() and acosh() in libstdc++ only have 6 digits - # of precision. - # Small gradient values + low precision --> High relative error - self._compareCpu(y, np.arcsinh, math_ops.asinh, grad_rtol=1e-2) - self._compareCpu(y, np.arccosh, math_ops.acosh, grad_rtol=1e-2) - - self._compareCpu(y, np.arctanh, math_ops.atanh) - self._compareCpu(x, self._sigmoid, math_ops.sigmoid) - self._compareCpu(x, np.sin, math_ops.sin) - self._compareCpu(x, np.cos, math_ops.cos) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3) - self._compareBothSparse(x, np.tanh, math_ops.tanh) - - # Numpy uses an incorrect definition of sign; use the right one instead. - def complex_sign(x): - return x / np.abs(x) - - self._compareBoth(y, complex_sign, math_ops.sign) - self._compareBothSparse(y, complex_sign, math_ops.sign) - - def testComplex128Basic(self): - x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype( - np.complex128) - y = x + np.complex(0.5, 0.5) # no zeros - self._compareBoth(x, np.abs, math_ops.abs) - self._compareBoth(x, np.abs, _ABS) - self._compareBoth(x, np.negative, math_ops.negative) - self._compareBoth(x, np.negative, _NEG) - self._compareCpu(y, self._inv, math_ops.reciprocal) - self._compareCpu(x, np.square, math_ops.square) - self._compareCpu(y, np.sqrt, math_ops.sqrt) - self._compareCpu(y, self._rsqrt, math_ops.rsqrt) - self._compareBoth(x, np.exp, math_ops.exp) - self._compareCpu(x, np.expm1, math_ops.expm1) - self._compareCpu(y, np.log, math_ops.log) - self._compareCpu(y, np.log1p, math_ops.log1p) - self._compareCpu(x, np.sinh, math_ops.sinh) - self._compareCpu(x, np.cosh, math_ops.cosh) - self._compareCpu(x, np.tanh, math_ops.tanh) - self._compareCpu(y, np.arcsinh, math_ops.asinh) - self._compareCpu(y, np.arccosh, math_ops.acosh) - self._compareCpu(y, np.arctanh, math_ops.atanh) - self._compareCpu(x, self._sigmoid, math_ops.sigmoid) - self._compareCpu(x, np.sin, math_ops.sin) - self._compareCpu(x, np.cos, math_ops.cos) - - self._compareBothSparse(x, np.abs, math_ops.abs) - self._compareBothSparse(x, np.negative, math_ops.negative) - self._compareBothSparse(x, np.square, math_ops.square) - self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3) - self._compareBothSparse(x, np.tanh, math_ops.tanh) - - # Numpy uses an incorrect definition of sign; use the right one instead. - def complex_sign(x): - return x / np.abs(x) - - self._compareBoth(y, complex_sign, math_ops.sign) - self._compareBothSparse(y, complex_sign, math_ops.sign) - - def testGradGrad(self): - np.random.seed(7) - shape = (5,) - dtype_tols = [(np.float32, 5e-4), (np.float64, 1e-6), (np.complex64, 5e-4), - (np.complex128, 1e-6)] - op_range = [ - (gen_math_ops.reciprocal_grad, [-2, 2]), - (gen_math_ops.rsqrt_grad, [0.1, 3]), - (gen_math_ops.sigmoid_grad, [-2, 2]), - (gen_math_ops.sqrt_grad, [0.1, 3]), - (gen_math_ops.tanh_grad, [-2, 2]), - ] - - def rand(dtype): - x = np.random.uniform( - real_range[0], real_range[1], size=shape[0]).astype(dtype) - if dtype in (np.complex64, np.complex128): - x += 1j * np.random.uniform(-2, 2, size=shape[0]).astype(dtype) - return x - - for op, real_range in op_range: - with self.cached_session(): - for dtype, tol in dtype_tols: - x = constant_op.constant(rand(dtype)) - y = constant_op.constant(rand(dtype)) - z = op(x, y) - grads = gradient_checker.compute_gradient( - [x, y], [shape, shape], - z, - shape, - x_init_value=[rand(dtype), rand(dtype)]) - if isinstance(grads, tuple): - grads = [grads] - for analytical, numerical in grads: - self.assertAllClose(analytical, numerical, rtol=tol, atol=tol) - - -class BinaryOpTest(test.TestCase): - - def _compareCpu(self, x, y, np_func, tf_func, also_compare_variables=False): - np_ans = np_func(x, y) - with self.test_session(use_gpu=False): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - out = tf_func(inx, iny) - tf_cpu = out.eval() - # Test that the op takes precedence over numpy operators. - np_left = tf_func(x, iny).eval() - np_right = tf_func(inx, y).eval() - - if also_compare_variables: - var_x = variables.Variable(x) - var_y = variables.Variable(y) - variables.global_variables_initializer().run() - print(type(x), type(y), type(var_x), type(var_y)) - print(type(tf_func(x, var_y)), type(tf_func(var_x, y))) - np_var_left = tf_func(x, var_y).eval() - np_var_right = tf_func(var_x, y).eval() - - if np_ans.dtype != np.object: - self.assertAllClose(np_ans, tf_cpu) - self.assertAllClose(np_ans, np_left) - self.assertAllClose(np_ans, np_right) - if also_compare_variables: - self.assertAllClose(np_ans, np_var_left) - self.assertAllClose(np_ans, np_var_right) - self.assertShapeEqual(np_ans, out) - - _GRAD_TOL = { - dtypes_lib.float16: 1e-3, - dtypes_lib.float32: 1e-3, - dtypes_lib.complex64: 1e-2, - dtypes_lib.float64: 1e-5, - dtypes_lib.complex128: 1e-4 - } - - def _compareGradientX(self, - x, - y, - np_func, - tf_func, - numeric_gradient_type=None): - z = np_func(x, y) - zs = list(z.shape) - with self.cached_session(): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - if x.dtype in (np.float32, np.float64): - out = 1.1 * tf_func(inx, iny) - else: - out = tf_func(inx, iny) - xs = list(x.shape) - jacob_t, jacob_n = gradient_checker.compute_gradient( - inx, xs, out, zs, x_init_value=x) - if numeric_gradient_type is not None: - xf = x.astype(numeric_gradient_type) - yf = y.astype(numeric_gradient_type) - inxf = ops.convert_to_tensor(xf) - inyf = ops.convert_to_tensor(yf) - outf = tf_func(inxf, inyf) - _, jacob_n = gradient_checker.compute_gradient( - inxf, xs, outf, zs, x_init_value=xf, delta=1e-3) - jacob_n = jacob_n.astype(x.dtype) - tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)] - self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) - - def _compareGradientY(self, - x, - y, - np_func, - tf_func, - numeric_gradient_type=None): - z = np_func(x, y) - zs = list(z.shape) - with self.cached_session(): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - if x.dtype in (np.float32, np.float64): - out = 1.1 * tf_func(inx, iny) - else: - out = tf_func(inx, iny) - ys = list(np.shape(y)) - jacob_t, jacob_n = gradient_checker.compute_gradient( - iny, ys, out, zs, x_init_value=y) - if numeric_gradient_type is not None: - xf = x.astype(numeric_gradient_type) - yf = y.astype(numeric_gradient_type) - inxf = ops.convert_to_tensor(xf) - inyf = ops.convert_to_tensor(yf) - outf = tf_func(inxf, inyf) - _, jacob_n = gradient_checker.compute_gradient( - inyf, ys, outf, zs, x_init_value=yf) - jacob_n = jacob_n.astype(x.dtype) - tol = self._GRAD_TOL[dtypes_lib.as_dtype(x.dtype)] - self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) - - def _compareGpu(self, x, y, np_func, tf_func): - np_ans = np_func(x, y) - with self.test_session(force_gpu=test_util.is_gpu_available()): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - out = tf_func(inx, iny) - tf_gpu = out.eval() - self.assertAllClose(np_ans, tf_gpu) - self.assertShapeEqual(np_ans, out) - # TODO(zhifengc/ke): make gradient checker work on GPU. - - def _compareBoth(self, x, y, np_func, tf_func, also_compare_variables=False): - self._compareCpu(x, y, np_func, tf_func, also_compare_variables) - if x.dtype in (np.float16, np.float32, np.float64, np.complex64, - np.complex128): - if tf_func not in (_FLOORDIV, math_ops.floordiv, math_ops.zeta, - math_ops.polygamma): - self._compareGradientX(x, y, np_func, tf_func) - self._compareGradientY(x, y, np_func, tf_func) - if tf_func in (math_ops.zeta, math_ops.polygamma): - # These methods only support gradients in the second parameter - self._compareGradientY(x, y, np_func, tf_func) - self._compareGpu(x, y, np_func, tf_func) - - def testFloatBasic(self): - x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float32) - y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float32) - self._compareBoth(x, y, np.add, math_ops.add, also_compare_variables=True) - self._compareBoth(x, y, np.subtract, math_ops.subtract) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) - self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv) - self._compareBoth(x, y, np.add, _ADD) - self._compareBoth(x, y, np.subtract, _SUB) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) - self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV) - self._compareBoth(x, y, np.arctan2, math_ops.atan2) - x1 = np.random.randn(5, 6).astype(np.float32) - x2 = np.random.randn(5, 6).astype(np.float32) - # Remove tiny values--atan2 gradients are flaky near the origin. - x1[np.abs(x1) < 0.05] = 0.05 * np.sign(x1[np.abs(x1) < 0.05]) - x2[np.abs(x2) < 0.05] = 0.05 * np.sign(x2[np.abs(x2) < 0.05]) - self._compareBoth(x1, x2, np.arctan2, math_ops.atan2) - try: - from scipy import special # pylint: disable=g-import-not-at-top - a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32) - x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32) - self._compareBoth(a_pos_small, x_pos_small, special.gammainc, - math_ops.igamma) - self._compareBoth(a_pos_small, x_pos_small, special.gammaincc, - math_ops.igammac) - # Need x > 1 - self._compareBoth(x_pos_small + 1, a_pos_small, special.zeta, - math_ops.zeta) - n_small = np.arange(0, 15).reshape(1, 3, 5).astype(np.float32) - self._compareBoth(n_small, x_pos_small, special.polygamma, - math_ops.polygamma) - except ImportError as e: - tf_logging.warn("Cannot test special functions: %s" % str(e)) - - def testFloatDifferentShapes(self): - x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.float32) - y = np.array([1, 2]).reshape(2, 1).astype(np.float32) - with self.cached_session() as sess: - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - s = math_ops.reduce_sum(inx * iny) - gx, gy = sess.run(gradients_impl.gradients(s, [inx, iny])) - # gx is simply the broadcasted y - self.assertAllEqual(gx, - np.array([1, 1, 2, 2]).reshape(2, 2).astype(np.float32)) - # gy is x's column summed up - self.assertAllEqual(gy, np.array([3, 7]).reshape(2, 1).astype(np.float32)) - - def testFloatVariableOverload(self): - x = np.array([1, 2, 3, 4]).reshape(2, 2).astype(np.int32) - y = np.array([1, 2]).reshape(2, 1).astype(np.int32) - var_x = variables.Variable(x) - var_y = variables.Variable(y) - with self.cached_session() as sess: - sess.run([var_x.initializer, var_y.initializer]) - left_result = (var_x * y).eval() - right_result = (x * var_y).eval() - np_result = x * y - self.assertAllEqual(np_result, left_result) - self.assertAllEqual(np_result, right_result) - - def testDoubleBasic(self): - x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float64) - y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float64) - self._compareBoth(x, y, np.add, math_ops.add) - self._compareBoth(x, y, np.subtract, math_ops.subtract) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) - self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv) - self._compareBoth(x, y, np.add, _ADD) - self._compareBoth(x, y, np.subtract, _SUB) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) - self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV) - self._compareBoth(x, y, np.arctan2, math_ops.atan2) - x1 = np.random.randn(7, 4).astype(np.float64) - x2 = np.random.randn(7, 4).astype(np.float64) - # Remove tiny values--atan2 gradients are flaky near the origin. - x1[np.abs(x1) < 0.5] = 0.5 * np.sign(x1[np.abs(x1) < 0.5]) - x2[np.abs(x2) < 0.5] = 0.5 * np.sign(x2[np.abs(x2) < 0.5]) - self._compareBoth(x1, x2, np.arctan2, math_ops.atan2) - try: - from scipy import special # pylint: disable=g-import-not-at-top - a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32) - x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32) - self._compareBoth(a_pos_small, x_pos_small, special.gammainc, - math_ops.igamma) - self._compareBoth(a_pos_small, x_pos_small, special.gammaincc, - math_ops.igammac) - except ImportError as e: - tf_logging.warn("Cannot test special functions: %s" % str(e)) - - def testUint8Basic(self): - x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint8) - y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint8) - self._compareBoth(x, y, np.add, math_ops.add) - - def testInt8Basic(self): - x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int8) - y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int8) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y, np.multiply, _MUL) - - def testInt16Basic(self): - x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int16) - y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int16) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y, np.multiply, _MUL) - - def testUint16Basic(self): - x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.uint16) - y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.uint16) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y, np.true_divide, math_ops.truediv) - self._compareBoth(x, y, np.floor_divide, math_ops.floordiv) - self._compareBoth(x, y, np.true_divide, _TRUEDIV) - self._compareBoth(x, y, np.floor_divide, _FLOORDIV) - - def testInt32Basic(self): - x = np.arange(1, 13, 2).reshape(1, 3, 2).astype(np.int32) - y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int32) - self._compareBoth(x, y, np.add, math_ops.add) - self._compareBoth(x, y, np.subtract, math_ops.subtract) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y, np.true_divide, math_ops.truediv) - self._compareBoth(x, y, np.floor_divide, math_ops.floordiv) - self._compareBoth(x, y, np.mod, math_ops.mod) - self._compareBoth(x, y, np.add, _ADD) - self._compareBoth(x, y, np.subtract, _SUB) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y, np.true_divide, _TRUEDIV) - self._compareBoth(x, y, np.floor_divide, _FLOORDIV) - self._compareBoth(x, y, np.mod, _MOD) - # _compareBoth tests on GPU only for floating point types, so test - # _MOD for int32 on GPU by calling _compareGpu - self._compareGpu(x, y, np.mod, _MOD) - - def testInt64Basic(self): - x = np.arange(1 << 40, 13 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64) - y = np.arange(1, 7, 1).reshape(1, 3, 2).astype(np.int64) - self._compareBoth(x, y, np.subtract, math_ops.subtract) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y, np.true_divide, math_ops.truediv) - self._compareBoth(x, y, np.floor_divide, math_ops.floordiv) - self._compareBoth(x, y, np.mod, math_ops.mod) - self._compareBoth(x, y, np.subtract, _SUB) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y, np.true_divide, _TRUEDIV) - self._compareBoth(x, y, np.floor_divide, _FLOORDIV) - self._compareBoth(x, y, np.mod, _MOD) - - def testComplex64Basic(self): - x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype( - np.complex64) - y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype( - np.complex64) - self._compareBoth(x, y, np.add, math_ops.add) - self._compareBoth(x, y, np.subtract, math_ops.subtract) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) - self._compareBoth(x, y, np.add, _ADD) - self._compareBoth(x, y, np.subtract, _SUB) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) - - def testComplex128Basic(self): - x = np.complex(1, 1) * np.linspace(-10, 10, 6).reshape(1, 3, 2).astype( - np.complex128) - y = np.complex(1, 1) * np.linspace(20, -20, 6).reshape(1, 3, 2).astype( - np.complex128) - self._compareBoth(x, y, np.add, math_ops.add) - self._compareBoth(x, y, np.subtract, math_ops.subtract) - self._compareBoth(x, y, np.multiply, math_ops.multiply) - self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv) - self._compareBoth(x, y, np.add, _ADD) - self._compareBoth(x, y, np.subtract, _SUB) - self._compareBoth(x, y, np.multiply, _MUL) - self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV) - - def testStringComparison(self): - x = np.array([["abc", "bh"], ["c", ""]]) - y = np.array([["abc", "bh"], ["def", "hi"]]) - with self.test_session(use_gpu=False) as sess: - cmp_eq = math_ops.equal(x, y) - cmp_not_eq = math_ops.not_equal(x, y) - values = sess.run([cmp_eq, cmp_not_eq]) - self.assertAllEqual([[True, True], [False, False]], values[0]) - self.assertAllEqual([[False, False], [True, True]], values[1]) - - def testString(self): - x = np.array( - [["x_0_0", "x_0_1", "x_0_2"], ["x_1_0", "x_1_1", "x_1_2"], - ["x_2_0", "x_2_1", "x_2_2"]], - dtype=np.object) - y = np.array( - [["y_0_0", "y_0_1", "y_0_2"], ["y_1_0", "y_1_1", "y_1_2"], - ["y_2_0", "y_2_1", "y_2_2"]], - dtype=np.object) - z = np.array([["z_0", "z_1", "z_2"]], dtype=np.object) - w = np.array("w", dtype=np.object) - self._compareCpu(x, y, _ADD, _ADD) - self._compareCpu(x, z, _ADD, _ADD) - self._compareCpu(x, w, _ADD, _ADD) - self._compareCpu(z, w, _ADD, _ADD) - - def _compareBCast(self, xs, ys, dtype, np_func, tf_func): - if dtype in (np.complex64, np.complex128): - x = (1 + np.linspace(0, 2 + 3j, np.prod(xs))).astype(dtype).reshape(xs) - y = (1 + np.linspace(0, 2 - 2j, np.prod(ys))).astype(dtype).reshape(ys) - else: - x = (1 + np.linspace(0, 5, np.prod(xs))).astype(dtype).reshape(xs) - y = (1 + np.linspace(0, 5, np.prod(ys))).astype(dtype).reshape(ys) - self._compareCpu(x, y, np_func, tf_func) - if x.dtype in (np.float16, np.float32, np.float64): - # TODO(aselle): Make the test work for dtypes: - # (np.complex64, np.complex128). - if tf_func not in (_FLOORDIV, math_ops.floordiv): - if x.dtype == np.float16: - # Compare fp16 theoretical gradients to fp32 numerical gradients, - # since fp16 numerical gradients are too imprecise unless great - # care is taken with choosing the inputs and the delta. This is - # a weaker check (in particular, it does not test the op itself, - # only its gradient), but it's much better than nothing. - self._compareGradientX(x, y, np_func, tf_func, np.float) - self._compareGradientY(x, y, np_func, tf_func, np.float) - else: - self._compareGradientX(x, y, np_func, tf_func) - self._compareGradientY(x, y, np_func, tf_func) - self._compareGpu(x, y, np_func, tf_func) - - # TODO(josh11b,vrv): Refactor this to use parameterized tests. - def _testBCastByFunc(self, funcs, xs, ys): - dtypes = [ - np.float16, - np.float32, - np.float64, - np.int32, - np.int64, - np.complex64, - np.complex128, - ] - for dtype in dtypes: - for (np_func, tf_func) in funcs: - if (dtype in (np.complex64, np.complex128) and - tf_func in (_FLOORDIV, math_ops.floordiv)): - continue # floordiv makes no sense for complex numbers - self._compareBCast(xs, ys, dtype, np_func, tf_func) - self._compareBCast(ys, xs, dtype, np_func, tf_func) - - def _testBCastA(self, xs, ys): - funcs = [ - (np.add, math_ops.add), - (np.add, _ADD), - ] - self._testBCastByFunc(funcs, xs, ys) - - def _testBCastB(self, xs, ys): - funcs = [ - (np.subtract, math_ops.subtract), - (np.subtract, _SUB), - (np.power, math_ops.pow), - ] - self._testBCastByFunc(funcs, xs, ys) - - def _testBCastC(self, xs, ys): - funcs = [ - (np.multiply, math_ops.multiply), - (np.multiply, _MUL), - ] - self._testBCastByFunc(funcs, xs, ys) - - def _testBCastD(self, xs, ys): - funcs = [ - (np.true_divide, math_ops.truediv), - (np.floor_divide, math_ops.floordiv), - (np.true_divide, _TRUEDIV), - (np.floor_divide, _FLOORDIV), - ] - self._testBCastByFunc(funcs, xs, ys) - - def testBCast_0A(self): - self._testBCastA([1, 3, 2], [1]) - - def testBCast_0B(self): - self._testBCastB([1, 3, 2], [1]) - - def testBCast_0C(self): - self._testBCastC([1, 3, 2], [1]) - - def testBCast_0D(self): - self._testBCastD([1, 3, 2], [1]) - - def testBCast_1A(self): - self._testBCastA([1, 3, 2], [2]) - - def testBCast_1B(self): - self._testBCastB([1, 3, 2], [2]) - - def testBCast_1C(self): - self._testBCastC([1, 3, 2], [2]) - - def testBCast_1D(self): - self._testBCastD([1, 3, 2], [2]) - - def testBCast_2A(self): - self._testBCastA([1, 3, 2], [3, 2]) - - def testBCast_2B(self): - self._testBCastB([1, 3, 2], [3, 2]) - - def testBCast_2C(self): - self._testBCastC([1, 3, 2], [3, 2]) - - def testBCast_2D(self): - self._testBCastD([1, 3, 2], [3, 2]) - - def testBCast_3A(self): - self._testBCastA([1, 3, 2], [3, 1]) - - def testBCast_3B(self): - self._testBCastB([1, 3, 2], [3, 1]) - - def testBCast_3C(self): - self._testBCastC([1, 3, 2], [3, 1]) - - def testBCast_3D(self): - self._testBCastD([1, 3, 2], [3, 1]) - - def testBCast_4A(self): - self._testBCastA([1, 3, 2], [1, 3, 2]) - - def testBCast_4B(self): - self._testBCastB([1, 3, 2], [1, 3, 2]) - - def testBCast_4C(self): - self._testBCastC([1, 3, 2], [1, 3, 2]) - - def testBCast_4D(self): - self._testBCastD([1, 3, 2], [1, 3, 2]) - - def testBCast_5A(self): - self._testBCastA([1, 3, 2], [2, 3, 1]) - - def testBCast_5B(self): - self._testBCastB([1, 3, 2], [2, 3, 1]) - - def testBCast_5C(self): - self._testBCastC([1, 3, 2], [2, 3, 1]) - - def testBCast_5D(self): - self._testBCastD([1, 3, 2], [2, 3, 1]) - - def testBCast_6A(self): - self._testBCastA([1, 3, 2], [2, 1, 1]) - - def testBCast_6B(self): - self._testBCastB([1, 3, 2], [2, 1, 1]) - - def testBCast_6C(self): - self._testBCastC([1, 3, 2], [2, 1, 1]) - - def testBCast_6D(self): - self._testBCastD([1, 3, 2], [2, 1, 1]) - - def testBCast_7A(self): - self._testBCastA([1, 3, 2], [1, 3, 1]) - - def testBCast_7B(self): - self._testBCastB([1, 3, 2], [1, 3, 1]) - - def testBCast_7C(self): - self._testBCastC([1, 3, 2], [1, 3, 1]) - - def testBCast_7D(self): - self._testBCastD([1, 3, 2], [1, 3, 1]) - - def testBCast_8A(self): - self._testBCastA([2, 1, 5], [2, 3, 1]) - - def testBCast_8B(self): - self._testBCastB([2, 1, 5], [2, 3, 1]) - - def testBCast_8C(self): - self._testBCastC([2, 1, 5], [2, 3, 1]) - - def testBCast_8D(self): - self._testBCastD([2, 1, 5], [2, 3, 1]) - - def testBCast_9A(self): - self._testBCastA([2, 0, 5], [2, 0, 1]) - - def testBCast_9B(self): - self._testBCastB([2, 0, 5], [2, 0, 1]) - - def testBCast_9C(self): - self._testBCastC([2, 0, 5], [2, 0, 1]) - - def testBCast_9D(self): - self._testBCastD([2, 0, 5], [2, 0, 1]) - - def testBCast_10A(self): - self._testBCastA([2, 3, 0], [2, 3, 1]) - - def testBCast_10B(self): - self._testBCastB([2, 3, 0], [2, 3, 1]) - - def testBCast_10C(self): - self._testBCastC([2, 3, 0], [2, 3, 1]) - - def testBCast_10D(self): - self._testBCastD([2, 3, 0], [2, 3, 1]) - - def testBCast_11A(self): - self._testBCastA([1, 3, 2], [1, 3, 2]) - - def testBCast_11B(self): - self._testBCastB([1, 3, 2], [1, 3, 2]) - - def testBCast_11C(self): - self._testBCastC([1, 3, 2], [1, 3, 2]) - - def testBCast_11D(self): - self._testBCastD([1, 3, 2], [1, 3, 2]) - - def testBCast_12A(self): - self._testBCastA([1, 1, 1, 1, 3, 2], [1, 3, 2]) - - def testBCast_12B(self): - self._testBCastB([1, 1, 1, 1, 3, 2], [1, 3, 2]) - - def testBCast_12C(self): - self._testBCastC([1, 1, 1, 1, 3, 2], [1, 3, 2]) - - def testBCast_12D(self): - self._testBCastD([1, 1, 1, 1, 3, 2], [1, 3, 2]) - - def testBCast_13A(self): - self._testBCastA([1, 3, 2, 1, 1], [1]) - - def testBCast_13B(self): - self._testBCastB([1, 3, 2, 1, 1], [1]) - - def testBCast_13C(self): - self._testBCastC([1, 3, 2, 1, 1], [1]) - - def testBCast_13D(self): - self._testBCastD([1, 3, 2, 1, 1], [1]) - - def testBCast_14A(self): - self._testBCastA([2, 3, 1, 1, 5], [1]) - - def testBCast_14B(self): - self._testBCastB([2, 3, 1, 1, 5], [1]) - - def testBCast_14C(self): - self._testBCastC([2, 3, 1, 1, 5], [1]) - - def testBCast_14D(self): - self._testBCastD([2, 3, 1, 1, 5], [1]) - - def testBCast_15A(self): - self._testBCastA([10, 3, 1, 2], [3, 1, 2]) - - def testBCast_15B(self): - self._testBCastB([10, 3, 1, 2], [3, 1, 2]) - - def testBCast_15C(self): - self._testBCastC([10, 3, 1, 2], [3, 1, 2]) - - def testBCast_15D(self): - self._testBCastD([10, 3, 1, 2], [3, 1, 2]) - - def testMismatchedDimensions(self): - for func in [ - math_ops.add, math_ops.subtract, math_ops.multiply, math_ops.div, _ADD, - _SUB, _MUL, _TRUEDIV, _FLOORDIV - ]: - with self.assertRaisesWithPredicateMatch( - ValueError, lambda e: "Dimensions must" in str(e)): - func( - ops.convert_to_tensor([10.0, 20.0, 30.0]), - ops.convert_to_tensor([[40.0, 50.0], [60.0, 70.0]])) - - def testZeroPowGrad(self): - with self.cached_session(): - for dtype in (np.float16, np.float32, np.float64, np.complex64, - np.complex128): - x = constant_op.constant(0.0, dtype=dtype) - y = constant_op.constant(2.0, dtype=dtype) - z = math_ops.pow(x, y) - error = gradient_checker.compute_gradient_error(y, [], z, []) - self.assertEqual(error, 0) - - def testComplexPowGrad(self): - with self.cached_session(): - for dtype in np.complex64, np.complex128: - for base in 2.0, -2.0: - x = constant_op.constant(base, dtype=dtype) - y = constant_op.constant(2.0, dtype=dtype) - z = math_ops.pow(x, y) - error = gradient_checker.compute_gradient_error(y, [], z, []) - self.assertLess(error, 2e-4) - - def testAtan2SpecialValues(self): - x1l, x2l = zip((+0.0, +0.0), (+0.0, -0.0), (-0.0, +0.0), (-0.0, -0.0), - (1.2345, float("inf")), (1.2345, -float("inf")), - (-4.321, float("inf")), (-4.125, -float("inf")), - (float("inf"), float("inf")), (float("inf"), -float("inf")), - (-float("inf"), float("inf")), - (-float("inf"), -float("inf"))) - for dtype in np.float32, np.float64: - x1 = np.array(x1l).astype(dtype) - x2 = np.array(x2l).astype(dtype) - self._compareCpu(x1, x2, np.arctan2, math_ops.atan2) - self._compareGpu(x1, x2, np.arctan2, math_ops.atan2) - - def testPowNegativeExponent(self): - for dtype in [np.int32, np.int64]: - with self.test_session(use_gpu=False) as sess: - with self.assertRaisesRegexp( - errors_impl.InvalidArgumentError, - "Integers to negative integer powers are not allowed"): - x = np.array([5, 2]).astype(dtype) - y = np.array([-2, 3]).astype(dtype) - sess.run(math_ops.pow(x, y)) - - with self.test_session(use_gpu=False) as sess: - with self.assertRaisesRegexp( - errors_impl.InvalidArgumentError, - "Integers to negative integer powers are not allowed"): - x = np.array([5, 2]).astype(dtype) - y = np.array([2, -3]).astype(dtype) - sess.run(math_ops.pow(x, y)) - - with self.test_session(use_gpu=False) as sess: - with self.assertRaisesRegexp( - errors_impl.InvalidArgumentError, - "Integers to negative integer powers are not allowed"): - x = np.array([5, 2]).astype(dtype) - y = -3 - sess.run(math_ops.pow(x, y)) - - class ComparisonOpTest(test.TestCase): def _compareScalar(self, func, x, y, dtype): diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py new file mode 100644 index 0000000000..77f182784e --- /dev/null +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -0,0 +1,541 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for unary coefficient-wise operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes as dtypes_lib +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_grad # pylint: disable=unused-import +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + +_NEG = lambda x: -x +_ABS = abs + + +# TODO(zongheng): it'd be great to factor out this function and various random +# SparseTensor gen funcs. +def _sparsify(x, thresh=0.5, index_dtype=np.int64): + x[x < thresh] = 0 + + non_zero = np.where(x) + x_indices = np.vstack(non_zero).astype(index_dtype).T + x_values = x[non_zero] + x_shape = x.shape + + return sparse_tensor.SparseTensor( + indices=x_indices, values=x_values, dense_shape=x_shape), x_values + + +def _default_tolerance(dtype): + """Returns a sensible default tolerance for comparing results of a given type. + + Args: + dtype: A datatype. + """ + if dtype == np.float16: + return 5e-3 + elif dtype in (np.float32, np.complex64): + return 1e-3 + elif dtype in (np.float64, np.complex128): + return 1e-5 + else: + return None # Fail fast for unexpected types + + +class UnaryOpTest(test.TestCase): + + def _compareCpu(self, x, np_func, tf_func, grad_rtol=None, grad_atol=None): + if grad_rtol is None: + grad_rtol = _default_tolerance(x.dtype) + if grad_atol is None: + grad_atol = _default_tolerance(x.dtype) + np_ans = np_func(x) + with self.test_session(use_gpu=False): + inx = ops.convert_to_tensor(x) + if x.dtype in (np.float32, np.float64, + dtypes_lib.bfloat16.as_numpy_dtype): + y = 1.1 * tf_func(inx) + np_ans *= 1.1 + else: + y = tf_func(inx) + tf_cpu = y.eval() + self.assertShapeEqual(np_ans, y) + if x.dtype == np.float16: + self.assertAllClose(np_ans, tf_cpu, rtol=1e-3, atol=1e-3) + elif x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: + self.assertAllClose(np_ans, tf_cpu, rtol=1e-2, atol=1e-2) + else: + self.assertAllClose(np_ans, tf_cpu) + + if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign: + return # Return early + + if x.dtype == np.float16: + s = list(np.shape(x)) + jacob_t, _ = gradient_checker.compute_gradient( + inx, s, y, s, x_init_value=x) + xf = x.astype(np.float) + inxf = ops.convert_to_tensor(xf) + yf = tf_func(inxf) + _, jacob_n = gradient_checker.compute_gradient( + inxf, s, yf, s, x_init_value=xf, delta=1e-2) + jacob_n = jacob_n.astype(np.float16) + self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) + elif x.dtype in (np.float32, np.complex64): + s = list(np.shape(x)) + jacob_t, jacob_n = gradient_checker.compute_gradient( + inx, s, y, s, x_init_value=x, delta=1e-3) + self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) + elif x.dtype in (np.float64, np.complex128): + s = list(np.shape(x)) + jacob_t, jacob_n = gradient_checker.compute_gradient( + inx, s, y, s, x_init_value=x, delta=1e-5) + self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) + + def _check(self, result_tensor, result_np, input_sp_t, tol): + self.assertTrue(isinstance(result_tensor, sparse_tensor.SparseTensor)) + self.assertTrue(isinstance(input_sp_t, sparse_tensor.SparseTensor)) + self.assertAllEqual(input_sp_t.indices.eval(), result_tensor.indices.eval()) + self.assertAllEqual(input_sp_t.dense_shape.eval(), + result_tensor.dense_shape.eval()) + if tol is None: + self.assertAllClose(result_np, result_tensor.values.eval()) + else: + self.assertAllClose( + result_np, result_tensor.values.eval(), rtol=tol, atol=tol) + + def _compareSparseCpu(self, x, np_func, tf_func, tol): + x_sp, x_sp_vals = _sparsify(x) + res_np = np_func(x_sp_vals) + with self.test_session(use_gpu=False): + self._check(tf_func(x_sp), res_np, x_sp, tol) + + def _compareGpu(self, x, np_func, tf_func): + np_ans = np_func(x) + with self.test_session(force_gpu=test_util.is_gpu_available()): + result = tf_func(ops.convert_to_tensor(x)) + tf_gpu = result.eval() + if x.dtype == np.float16: + self.assertAllClose(np_ans, tf_gpu, rtol=1e-3, atol=1e-3) + else: + self.assertAllClose(np_ans, tf_gpu) + # TODO(zhifengc/ke): make gradient checker work on GPU. + + def _compareSparseGpu(self, x, np_func, tf_func, tol): + x_sp, x_sp_vals = _sparsify(x) + res_np = np_func(x_sp_vals) + with self.test_session(force_gpu=test_util.is_gpu_available()): + self._check(tf_func(x_sp), res_np, x_sp, tol) + + def _compareBoth(self, x, np_func, tf_func): + self._compareCpu(x, np_func, tf_func) + self._compareGpu(x, np_func, tf_func) + + def _compareBothSparse(self, x, np_func, tf_func, tol=None): + self._compareSparseCpu(x, np_func, tf_func, tol) + self._compareSparseGpu(x, np_func, tf_func, tol) + + def _inv(self, x): + return 1.0 / x + + def _rsqrt(self, x): + return self._inv(np.sqrt(x)) + + def _sigmoid(self, x): + return 1.0 / (1.0 + np.exp(-x)) + + def _log_sigmoid(self, x): + return np.log(self._sigmoid(x)) + + def _replace_domain_error_with_inf(self, fn): + + def func(x): + try: + return fn(x) + except ValueError as e: + if "domain error" in str(e): + return np.inf * np.ones_like(x) + else: + raise e + + return func + + def testFloatBasic(self): + x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float32) + w = x - x.min() + 1.02 # all greater than 1 + y = (x + .5).astype(np.float32) # no zero + z = (x + 15.5).astype(np.float32) # all positive + k = np.arange(-0.90, 0.90, 0.25).astype(np.float32) # between -1 and 1 + + self._compareBoth(x, np.abs, math_ops.abs) + self._compareBoth(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareBoth(y, self._inv, math_ops.reciprocal) + self._compareBoth(x, np.square, math_ops.square) + self._compareBoth(z, np.sqrt, math_ops.sqrt) + self._compareBoth(z, self._rsqrt, math_ops.rsqrt) + self._compareBoth(x, np.exp, math_ops.exp) + self._compareBoth(x, np.expm1, math_ops.expm1) + self._compareBoth(z, np.log, math_ops.log) + self._compareBoth(z, np.log1p, math_ops.log1p) + self._compareBoth(x, np.sinh, math_ops.sinh) + self._compareBoth(x, np.cosh, math_ops.cosh) + self._compareBoth(x, np.tanh, math_ops.tanh) + self._compareBoth(x, np.arcsinh, math_ops.asinh) + self._compareBoth(w, np.arccosh, math_ops.acosh) + self._compareBoth(k, np.arctanh, math_ops.atanh) + self._compareBoth(x, self._sigmoid, math_ops.sigmoid) + self._compareBoth(x, self._log_sigmoid, math_ops.log_sigmoid) + self._compareBoth(y, np.sign, math_ops.sign) + self._compareBoth(x, np.sin, math_ops.sin) + self._compareBoth(x, np.cos, math_ops.cos) + self._compareBoth(k, np.arcsin, math_ops.asin) + self._compareBoth(k, np.arccos, math_ops.acos) + self._compareBoth(x, np.arctan, math_ops.atan) + self._compareBoth(x, np.tan, math_ops.tan) + self._compareBoth( + y, np.vectorize(self._replace_domain_error_with_inf(math.lgamma)), + math_ops.lgamma) + self._compareBoth(x, np.vectorize(math.erf), math_ops.erf) + self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc) + try: + from scipy import special # pylint: disable=g-import-not-at-top + self._compareBoth(x, special.i0e, math_ops.bessel_i0e) + self._compareBoth(x, special.i1e, math_ops.bessel_i1e) + except ImportError as e: + tf_logging.warn("Cannot test special functions: %s" % str(e)) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3) + self._compareBothSparse(x, np.tanh, math_ops.tanh) + self._compareBothSparse(y, np.sign, math_ops.sign) + self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf) + + def testFloatTanhEdge(self): + x = np.arange(40, 40 + 6).reshape(6).astype(np.float32) + self._compareBoth(x, np.tanh, math_ops.tanh) + x = np.arange(-40, -40 + 6).reshape(6).astype(np.float32) + self._compareBoth(x, np.tanh, math_ops.tanh) + + def testFloatEmpty(self): + x = np.empty((2, 0, 5), dtype=np.float32) + self._compareBoth(x, np.abs, math_ops.abs) + self._compareBoth(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareBoth(x, self._inv, math_ops.reciprocal) + self._compareBoth(x, np.square, math_ops.square) + self._compareBoth(x, np.sqrt, math_ops.sqrt) + self._compareBoth(x, self._rsqrt, math_ops.rsqrt) + self._compareBoth(x, np.exp, math_ops.exp) + self._compareBoth(x, np.expm1, math_ops.expm1) + self._compareBoth(x, np.log, math_ops.log) + self._compareBoth(x, np.log1p, math_ops.log1p) + self._compareBoth(x, np.sinh, math_ops.sinh) + self._compareBoth(x, np.arcsinh, math_ops.asinh) + self._compareBoth(x, np.cosh, math_ops.cosh) + self._compareBoth(x, np.tanh, math_ops.tanh) + self._compareBoth(x, self._sigmoid, math_ops.sigmoid) + self._compareBoth(x, np.sign, math_ops.sign) + self._compareBoth(x, np.sin, math_ops.sin) + self._compareBoth(x, np.cos, math_ops.cos) + # Can't use vectorize below, so just use some arbitrary function + self._compareBoth(x, np.sign, math_ops.lgamma) + self._compareBoth(x, np.sign, math_ops.erf) + self._compareBoth(x, np.sign, math_ops.erfc) + self._compareBoth(x, np.tan, math_ops.tan) + self._compareBoth(x, np.arcsin, math_ops.asin) + self._compareBoth(x, np.arccos, math_ops.acos) + self._compareBoth(x, np.arctan, math_ops.atan) + try: + from scipy import special # pylint: disable=g-import-not-at-top + self._compareBoth(x, special.i0e, math_ops.bessel_i0e) + self._compareBoth(x, special.i1e, math_ops.bessel_i1e) + except ImportError as e: + tf_logging.warn("Cannot test special functions: %s" % str(e)) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(x, np.sqrt, math_ops.sqrt, tol=1e-3) + self._compareBothSparse(x, np.tanh, math_ops.tanh) + self._compareBothSparse(x, np.sign, math_ops.sign) + self._compareBothSparse(x, np.sign, math_ops.erf) + + def testDoubleBasic(self): + x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float64) + w = x - x.min() + 1.02 # all greater than 1 + y = (x + .5).astype(np.float64) # no zero + z = (x + 15.5).astype(np.float64) # all positive + k = np.arange(-0.90, 0.90, + 0.35).reshape(1, 3, 2).astype(np.float64) # between -1 and 1 + self._compareBoth(x, np.abs, math_ops.abs) + self._compareBoth(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareBoth(y, self._inv, math_ops.reciprocal) + self._compareBoth(x, np.square, math_ops.square) + self._compareBoth(z, np.sqrt, math_ops.sqrt) + self._compareBoth(z, self._rsqrt, math_ops.rsqrt) + self._compareBoth(x, np.exp, math_ops.exp) + self._compareBoth(x, np.expm1, math_ops.expm1) + self._compareBoth(z, np.log, math_ops.log) + self._compareBoth(z, np.log1p, math_ops.log1p) + self._compareBoth(x, np.sinh, math_ops.sinh) + self._compareBoth(x, np.cosh, math_ops.cosh) + self._compareBoth(x, np.tanh, math_ops.tanh) + self._compareBoth(x, np.arcsinh, math_ops.asinh) + self._compareBoth(w, np.arccosh, math_ops.acosh) + self._compareBoth(k, np.arctanh, math_ops.atanh) + self._compareBoth(x, self._sigmoid, math_ops.sigmoid) + self._compareBoth(y, np.sign, math_ops.sign) + self._compareBoth(x, np.sin, math_ops.sin) + self._compareBoth(x, np.cos, math_ops.cos) + self._compareBoth( + y, np.vectorize(self._replace_domain_error_with_inf(math.lgamma)), + math_ops.lgamma) + self._compareBoth(x, np.vectorize(math.erf), math_ops.erf) + self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc) + self._compareBoth(x, np.arctan, math_ops.atan) + self._compareBoth(k, np.arcsin, math_ops.asin) + self._compareBoth(k, np.arccos, math_ops.acos) + self._compareBoth(k, np.tan, math_ops.tan) + try: + from scipy import special # pylint: disable=g-import-not-at-top + self._compareBoth(x, special.i0e, math_ops.bessel_i0e) + self._compareBoth(x, special.i1e, math_ops.bessel_i1e) + except ImportError as e: + tf_logging.warn("Cannot test special functions: %s" % str(e)) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3) + self._compareBothSparse(x, np.tanh, math_ops.tanh) + self._compareBothSparse(y, np.sign, math_ops.sign) + self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf) + + def testHalfBasic(self): + x = np.arange(-3, 3).reshape(1, 3, 2).astype(np.float16) + y = (x + .5).astype(np.float16) # no zero + z = (x + 15.5).astype(np.float16) # all positive + self._compareBoth(x, np.abs, math_ops.abs) + self._compareBoth(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareBoth(y, self._inv, math_ops.reciprocal) + self._compareBoth(x, np.square, math_ops.square) + self._compareBoth(z, np.sqrt, math_ops.sqrt) + self._compareBoth(z, self._rsqrt, math_ops.rsqrt) + self._compareBoth(x, np.exp, math_ops.exp) + self._compareBoth(x, np.expm1, math_ops.expm1) + self._compareBoth(z, np.log, math_ops.log) + self._compareBoth(z, np.log1p, math_ops.log1p) + self._compareBoth(x, np.tanh, math_ops.tanh) + self._compareBoth(x, self._sigmoid, math_ops.sigmoid) + self._compareBoth(y, np.sign, math_ops.sign) + self._compareBoth(x, np.sin, math_ops.sin) + self._compareBoth(x, np.cos, math_ops.cos) + self._compareBoth( + y, np.vectorize(self._replace_domain_error_with_inf(math.lgamma)), + math_ops.lgamma) + self._compareBoth(x, np.vectorize(math.erf), math_ops.erf) + self._compareBoth(x, np.vectorize(math.erfc), math_ops.erfc) + try: + from scipy import special # pylint: disable=g-import-not-at-top + self._compareBoth(x, special.i0e, math_ops.bessel_i0e) + self._compareBoth(x, special.i1e, math_ops.bessel_i1e) + except ImportError as e: + tf_logging.warn("Cannot test special functions: %s" % str(e)) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(z, np.sqrt, math_ops.sqrt, tol=1e-3) + self._compareBothSparse(x, np.tanh, math_ops.tanh) + self._compareBothSparse(y, np.sign, math_ops.sign) + self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3) + + def testInt32Basic(self): + x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int32) + self._compareCpu(x, np.abs, math_ops.abs) + self._compareCpu(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareBoth(x, np.square, math_ops.square) + self._compareCpu(x, np.sign, math_ops.sign) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(x, np.sign, math_ops.sign) + + def testInt64Basic(self): + x = np.arange(-6 << 40, 6 << 40, 2 << 40).reshape(1, 3, 2).astype(np.int64) + self._compareCpu(x, np.abs, math_ops.abs) + self._compareCpu(x, np.abs, _ABS) + self._compareCpu(x, np.negative, math_ops.negative) + self._compareCpu(x, np.negative, _NEG) + self._compareCpu(x, np.sign, math_ops.sign) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.sign, math_ops.sign) + + def testInt64Square(self): + x = np.arange(-6 << 20, 6 << 20, 2 << 20).reshape(1, 3, 2).astype(np.int64) + self._compareCpu(x, np.square, math_ops.square) + self._compareBothSparse(x, np.square, math_ops.square) + + def testComplex64Basic(self): + x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype( + np.complex64) + y = x + np.complex(0.5, 0.5) # no zeros + self._compareBoth(x, np.abs, math_ops.abs) + self._compareBoth(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareCpu(y, self._inv, math_ops.reciprocal) + self._compareCpu(x, np.square, math_ops.square) + self._compareCpu(y, np.sqrt, math_ops.sqrt) + self._compareCpu(y, self._rsqrt, math_ops.rsqrt) + self._compareBoth(x, np.exp, math_ops.exp) + self._compareCpu(x, np.expm1, math_ops.expm1) + self._compareCpu(y, np.log, math_ops.log) + self._compareCpu(y, np.log1p, math_ops.log1p) + self._compareCpu(x, np.sinh, math_ops.sinh) + self._compareCpu(x, np.cosh, math_ops.cosh) + self._compareCpu(x, np.tanh, math_ops.tanh) + + # Complex64 versions of asinh() and acosh() in libstdc++ only have 6 digits + # of precision. + # Small gradient values + low precision --> High relative error + self._compareCpu(y, np.arcsinh, math_ops.asinh, grad_rtol=1e-2) + self._compareCpu(y, np.arccosh, math_ops.acosh, grad_rtol=1e-2) + + self._compareCpu(y, np.arctanh, math_ops.atanh) + self._compareCpu(x, self._sigmoid, math_ops.sigmoid) + self._compareCpu(x, np.sin, math_ops.sin) + self._compareCpu(x, np.cos, math_ops.cos) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3) + self._compareBothSparse(x, np.tanh, math_ops.tanh) + + # Numpy uses an incorrect definition of sign; use the right one instead. + def complex_sign(x): + return x / np.abs(x) + + self._compareBoth(y, complex_sign, math_ops.sign) + self._compareBothSparse(y, complex_sign, math_ops.sign) + + def testComplex128Basic(self): + x = np.complex(1, 1) * np.arange(-3, 3).reshape(1, 3, 2).astype( + np.complex128) + y = x + np.complex(0.5, 0.5) # no zeros + self._compareBoth(x, np.abs, math_ops.abs) + self._compareBoth(x, np.abs, _ABS) + self._compareBoth(x, np.negative, math_ops.negative) + self._compareBoth(x, np.negative, _NEG) + self._compareCpu(y, self._inv, math_ops.reciprocal) + self._compareCpu(x, np.square, math_ops.square) + self._compareCpu(y, np.sqrt, math_ops.sqrt) + self._compareCpu(y, self._rsqrt, math_ops.rsqrt) + self._compareBoth(x, np.exp, math_ops.exp) + self._compareCpu(x, np.expm1, math_ops.expm1) + self._compareCpu(y, np.log, math_ops.log) + self._compareCpu(y, np.log1p, math_ops.log1p) + self._compareCpu(x, np.sinh, math_ops.sinh) + self._compareCpu(x, np.cosh, math_ops.cosh) + self._compareCpu(x, np.tanh, math_ops.tanh) + self._compareCpu(y, np.arcsinh, math_ops.asinh) + self._compareCpu(y, np.arccosh, math_ops.acosh) + self._compareCpu(y, np.arctanh, math_ops.atanh) + self._compareCpu(x, self._sigmoid, math_ops.sigmoid) + self._compareCpu(x, np.sin, math_ops.sin) + self._compareCpu(x, np.cos, math_ops.cos) + + self._compareBothSparse(x, np.abs, math_ops.abs) + self._compareBothSparse(x, np.negative, math_ops.negative) + self._compareBothSparse(x, np.square, math_ops.square) + self._compareBothSparse(x, np.sqrt, math_ops.sqrt, 1e-3) + self._compareBothSparse(x, np.tanh, math_ops.tanh) + + # Numpy uses an incorrect definition of sign; use the right one instead. + def complex_sign(x): + return x / np.abs(x) + + self._compareBoth(y, complex_sign, math_ops.sign) + self._compareBothSparse(y, complex_sign, math_ops.sign) + + def testGradGrad(self): + np.random.seed(7) + shape = (5,) + dtype_tols = [(np.float32, 5e-4), (np.float64, 1e-6), (np.complex64, 5e-4), + (np.complex128, 1e-6)] + op_range = [ + (gen_math_ops.reciprocal_grad, [-2, 2]), + (gen_math_ops.rsqrt_grad, [0.1, 3]), + (gen_math_ops.sigmoid_grad, [-2, 2]), + (gen_math_ops.sqrt_grad, [0.1, 3]), + (gen_math_ops.tanh_grad, [-2, 2]), + ] + + def rand(dtype, real_range): + x = np.random.uniform( + real_range[0], real_range[1], size=shape[0]).astype(dtype) + if dtype in (np.complex64, np.complex128): + x += 1j * np.random.uniform(-2, 2, size=shape[0]).astype(dtype) + return x + + for op, real_range in op_range: + with self.cached_session(): + for dtype, tol in dtype_tols: + x = constant_op.constant(rand(dtype, real_range)) + y = constant_op.constant(rand(dtype, real_range)) + z = op(x, y) + grads = gradient_checker.compute_gradient( + [x, y], [shape, shape], + z, + shape, + x_init_value=[rand(dtype, real_range), + rand(dtype, real_range)]) + if isinstance(grads, tuple): + grads = [grads] + for analytical, numerical in grads: + self.assertAllClose(analytical, numerical, rtol=tol, atol=tol) + + +if __name__ == "__main__": + test.main() -- GitLab From 1bd2804869355a7cd0cbfbe9e6aab7591b8a20de Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Mon, 17 Sep 2018 18:54:34 -0700 Subject: [PATCH 0311/1357] Add Keras TPU support for the new metrics. PiperOrigin-RevId: 213378552 --- .../contrib/tpu/python/tpu/keras_support.py | 15 ++++++- tensorflow/python/keras/engine/saving_test.py | 7 ++++ tensorflow/python/keras/metrics.py | 42 +++++++++++++------ 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py index 776b9bff0f..bf445256b6 100644 --- a/tensorflow/contrib/tpu/python/tpu/keras_support.py +++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py @@ -76,6 +76,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import backend as K from tensorflow.python.keras import callbacks as cbks +from tensorflow.python.keras import metrics as metrics_module from tensorflow.python.keras import models from tensorflow.python.keras import optimizers as keras_optimizers from tensorflow.python.keras.engine import base_layer @@ -293,6 +294,16 @@ def _replicated_optimizer(opt): return KerasCrossShardOptimizer(opt) +def clone_metrics(metrics): + """Returns a copy of metrics. A copy is created for stateful metrics.""" + if metrics is None: + return None + return [ + m.__class__.from_config(m.get_config()) + if isinstance(m, metrics_module.Metric) else m for m in metrics + ] + + class TPURewriteContext(object): """Prepare the environment for a Keras model during `tpu.rewrite`. @@ -811,8 +822,8 @@ class TPUFunction(object): optimizer=_replicated_optimizer(cloned_optimizer), loss=self.model.loss, loss_weights=self.model.loss_weights, - metrics=self.model.metrics, - weighted_metrics=self.model.weighted_metrics, + metrics=clone_metrics(self.model.metrics), + weighted_metrics=clone_metrics(self.model.weighted_metrics), target_tensors=tpu_targets, ) diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 148dd23be7..02d99d5d69 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -370,6 +370,13 @@ class TestWholeModelSaving(test.TestCase): y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) + + x = np.random.random((1, 3)) + y = np.random.random((1, 3, 3)) + eval_out = model.evaluate(x, y) + eval_out2 = new_model.evaluate(x, y) + self.assertArrayNear(eval_out, eval_out2, 0.001) + out = model.predict(x) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index fd3c39cf2e..e64241e5cf 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -199,7 +199,6 @@ def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight): # squeeze last dim of `y_pred` or `y_true` if their rank differs by 1 y_true, y_pred = confusion_matrix.remove_squeezable_dimensions( y_true, y_pred) - y_pred.get_shape().assert_is_compatible_with(y_true.get_shape()) if sample_weight is None: return y_pred, y_true, None @@ -342,19 +341,14 @@ class Metric(Layer): # weak reference. This is to remove reference cycle that is created here. # This is not an issue in python versions > 3. if context.executing_eagerly(): - update_state = weakmethod(obj.update_state) - else: - update_state = function.defun(obj.update_state) + obj.update_state = weakmethod(obj.update_state) obj.update_state = weakmethod( - types.MethodType(update_state_wrapper(update_state), obj)) + types.MethodType(update_state_wrapper(obj.update_state), obj)) result = weakmethod(obj.result) obj.result = weakmethod(types.MethodType(result_wrapper(result), obj)) else: - # Converting update_state_fn() into a graph function, so that - # we can return a single op that performs all of the variable updates. - defuned_update_state_fn = function.defun(obj.update_state) obj.update_state = types.MethodType( - update_state_wrapper(defuned_update_state_fn), obj) + update_state_wrapper(obj.update_state), obj) obj.result = types.MethodType(result_wrapper(obj.result), obj) return obj @@ -475,6 +469,9 @@ class Mean(Metric): Args: values: Per-example value. sample_weight: Optional weighting of each example. Defaults to 1. + + Returns: + Update op. """ values = math_ops.cast(values, self._dtype) if sample_weight is None: @@ -501,8 +498,9 @@ class Mean(Metric): values = math_ops.reduce_sum(values) # Update state variables - state_ops.assign_add(self.total, values) - state_ops.assign_add(self.count, num_values) + update_total_op = state_ops.assign_add(self.total, values) + update_count_op = state_ops.assign_add(self.count, num_values) + return control_flow_ops.group(update_total_op, update_count_op) def result(self): return safe_div(self.total, self.count) @@ -536,6 +534,9 @@ class MeanMetricWrapper(Mean): sample_weight: Optional weighting of each example. Defaults to 1. Can be a `Tensor` whose rank is either 0, or the same rank as `y_true`, and must be broadcastable to `y_true`. + + Returns: + Update op. """ y_true = math_ops.cast(y_true, self._dtype) y_pred = math_ops.cast(y_pred, self._dtype) @@ -543,7 +544,7 @@ class MeanMetricWrapper(Mean): y_pred, y_true, sample_weight) matches = self._fn(y_true, y_pred, **self._fn_kwargs) - super(MeanMetricWrapper, self).update_state( + return super(MeanMetricWrapper, self).update_state( matches, sample_weight=sample_weight) def get_config(self): @@ -600,6 +601,23 @@ class CategoricalAccuracy(MeanMetricWrapper): categorical_accuracy, name, dtype=dtype) +class SparseCategoricalAccuracy(MeanMetricWrapper): + """Calculates how often predictions matches integer labels. + + This metric creates two local variables, `total` and `count` that are used to + compute the frequency with which `y_pred` matches `y_true`. This frequency is + ultimately returned as `sparse categorical accuracy`: an idempotent operation + that simply divides `total` by `count`. + + If `sample_weight` is `None`, weights default to 1. + Use `sample_weight` of 0 to mask values. + """ + + def __init__(self, name='sparse_categorical_accuracy', dtype=None): + super(SparseCategoricalAccuracy, self).__init__( + sparse_categorical_accuracy, name, dtype=dtype) + + @tf_export('keras.metrics.binary_accuracy') def binary_accuracy(y_true, y_pred, threshold=0.5): threshold = math_ops.cast(threshold, y_pred.dtype) -- GitLab From 2cb119b81fd08a1e680a2b44ff68c0a8c76eb017 Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Tue, 18 Sep 2018 10:40:54 +0800 Subject: [PATCH 0312/1357] [tflite] fix calculating of output pixels fix an issue reported by issue #22310 --- .../contrib/lite/examples/label_image/bitmap_helpers_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h index 6fdcf78b69..7e09d4bc79 100644 --- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h +++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h @@ -81,7 +81,7 @@ void resize(T* out, uint8_t* in, int image_height, int image_width, auto output = interpreter->typed_tensor(2); auto output_number_of_pixels = - wanted_height * wanted_height * wanted_channels; + wanted_height * wanted_width * wanted_channels; for (int i = 0; i < output_number_of_pixels; i++) { if (s->input_floating) -- GitLab From bb9958ab69a38cbe57d119947b635a257fa6b802 Mon Sep 17 00:00:00 2001 From: James Qin Date: Mon, 17 Sep 2018 19:56:41 -0700 Subject: [PATCH 0313/1357] Register fp16 reduce_max on GPU PiperOrigin-RevId: 213383647 --- tensorflow/core/kernels/reduction_ops_max.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/reduction_ops_max.cc b/tensorflow/core/kernels/reduction_ops_max.cc index 9cf953f4bf..8bfa44b2d0 100644 --- a/tensorflow/core/kernels/reduction_ops_max.cc +++ b/tensorflow/core/kernels/reduction_ops_max.cc @@ -50,6 +50,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS); .TypeConstraint("Tidx") \ .HostMemory("reduction_indices"), \ ReductionOp>); + +REGISTER_GPU_KERNELS(Eigen::half); REGISTER_GPU_KERNELS(float); REGISTER_GPU_KERNELS(double); REGISTER_GPU_KERNELS(int64); -- GitLab From 0b7125d3c5e7128470a7a74cf8a3543eab39c2d8 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 17 Sep 2018 20:25:23 -0700 Subject: [PATCH 0314/1357] Fix unused variable error on powerpc. PiperOrigin-RevId: 213386145 --- .../internal/optimized/depthwiseconv_float.h | 6 ++---- .../internal/optimized/depthwiseconv_uint8.h | 14 ++++++-------- .../optimized/depthwiseconv_uint8_3x3_filter.h | 8 +++++--- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index f0bea7fa1d..114575a96a 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -932,9 +932,6 @@ inline void DepthwiseConv( TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const bool has_dilation = (params.dilation_width_factor != 1) || - (params.dilation_height_factor != 1); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); const int input_height = input_shape.Dims(1); @@ -966,7 +963,8 @@ inline void DepthwiseConv( FIXED_DEPTH_MULTIPLIER) \ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ - depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) { \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER && \ + dilation_height_factor == 1 && dilation_width_factor == 1) { \ row_accum_func = \ FloatDepthwiseConvAccumRow; \ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 494cf70504..ee3fe78a10 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1718,18 +1718,15 @@ inline void DepthwiseConv( TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - const bool has_dilation = - (dilation_width_factor != 1) || (dilation_height_factor != 1); - // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. - if (Fast3x3FilterKernelSupported(input_shape, filter_shape, stride_width, - stride_height, has_dilation, pad_width, - pad_height, depth_multiplier, output_shape, - output_shift)) { + if (Fast3x3FilterKernelSupported( + input_shape, filter_shape, stride_width, stride_height, + dilation_width_factor, dilation_height_factor, pad_width, pad_height, + depth_multiplier, output_shape, output_shift)) { DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data); @@ -1756,7 +1753,8 @@ inline void DepthwiseConv( FIXED_DEPTH_MULTIPLIER) \ if (!row_accum_func && (stride_width == 1 || ALLOW_STRIDED) && \ (input_depth == FIXED_INPUT_DEPTH || FIXED_INPUT_DEPTH == 0) && \ - depth_multiplier == FIXED_DEPTH_MULTIPLIER && !has_dilation) { \ + depth_multiplier == FIXED_DEPTH_MULTIPLIER && \ + dilation_width_factor == 1 && dilation_height_factor == 1) { \ row_accum_func = \ QuantizedDepthwiseConvAccumRow; \ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 5087227182..e14d04ad02 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -3176,8 +3176,9 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data, inline bool Fast3x3FilterKernelSupported( const RuntimeShape& input_shape, const RuntimeShape& filter_shape, - int32 stride_width, int32 stride_height, bool has_dilation, int32 pad_width, - int32 pad_height, int32 depth_multiplier, const RuntimeShape& output_shape, + int32 stride_width, int32 stride_height, int32 dilation_width_factor, + int32 dilation_height_factor, int32 pad_width, int32 pad_height, + int32 depth_multiplier, const RuntimeShape& output_shape, int32 output_shift) { const int32 input_height = input_shape.Dims(1); const int32 input_width = input_shape.Dims(2); @@ -3193,7 +3194,8 @@ inline bool Fast3x3FilterKernelSupported( (stride_height == 1 || stride_height == 2) && (stride_width == stride_height) && (pad_width == 0 || pad_width == 1) && (pad_height == 0 || pad_height == 1) && (pad_width == pad_height) && - (input_depth % 8) == 0 && (output_shift > 0) && !has_dilation; + (input_depth % 8) == 0 && (output_shift > 0) && + dilation_width_factor == 1 && dilation_height_factor == 1; if (!supported) { return false; -- GitLab From eeb477cf661a16ee39e0621fd225d1f15859ffc8 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 17 Sep 2018 20:28:59 -0700 Subject: [PATCH 0315/1357] [tf.data] Fixing an error in the optimization loop. PiperOrigin-RevId: 213386401 --- tensorflow/core/framework/model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index b3fe357ea1..112298c344 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -322,7 +322,7 @@ void Model::Optimize(int64 cpu_budget) { } tunable->value--; } - if (best_tunable) { + if (!best_tunable) { // NOTE: This can happen because we are performing the optimization // while the model data is changing. If this becomes an issue, we should // look into performing the optimization using a model snapshot. -- GitLab From b91e27a9c33d038af79a0944eb9046b926d483c8 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 17 Sep 2018 21:01:19 -0700 Subject: [PATCH 0316/1357] Refactor out the metadata_ops set from const_analysis to a per-op bit; NFC PiperOrigin-RevId: 213389224 --- tensorflow/compiler/tf2xla/const_analysis.cc | 12 +++------- .../compiler/tf2xla/kernels/shape_op.cc | 8 +++---- tensorflow/compiler/tf2xla/xla_op_registry.cc | 24 +++++++++++++++++++ tensorflow/compiler/tf2xla/xla_op_registry.h | 12 ++++++++++ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 922ae7c79a..027ca6d2d2 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -29,14 +29,6 @@ Status BackwardsConstAnalysis(const Graph& g, std::vector* compile_time_const_arg_indices, std::vector* compile_time_const_nodes, std::function edge_filter) { - // Operators that don't look at the data of their inputs, just the shapes. - const std::unordered_set metadata_ops = { - "Rank", - "Shape", - "ShapeN", - "Size", - }; - std::vector compile_time_const_nodes_impl; if (compile_time_const_nodes) { CHECK_EQ(compile_time_const_nodes->size(), g.num_node_ids()); @@ -50,7 +42,9 @@ Status BackwardsConstAnalysis(const Graph& g, if (!status.ok()) return; // If this is a metadata-only op, don't propagate the const requirement. - if (metadata_ops.find(node->type_string()) != metadata_ops.end()) return; + if (XlaOpRegistry::IsMetadataOp(node->type_string())) { + return; + } // If this node must be const, and it isn't a metadata op, then all of its // parents must be const. diff --git a/tensorflow/compiler/tf2xla/kernels/shape_op.cc b/tensorflow/compiler/tf2xla/kernels/shape_op.cc index 2e0a69b70e..c8a0f31a03 100644 --- a/tensorflow/compiler/tf2xla/kernels/shape_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/shape_op.cc @@ -44,7 +44,7 @@ class ShapeOp : public XlaOpKernel { DataType out_dtype_; }; -REGISTER_XLA_OP(Name("Shape").CompilationOnly(), ShapeOp); +REGISTER_XLA_OP(Name("Shape").CompilationOnly().IsMetadataOp(), ShapeOp); class ShapeNOp : public XlaOpKernel { public: @@ -66,7 +66,7 @@ class ShapeNOp : public XlaOpKernel { private: DataType out_dtype_; }; -REGISTER_XLA_OP(Name("ShapeN").CompilationOnly(), ShapeNOp); +REGISTER_XLA_OP(Name("ShapeN").CompilationOnly().IsMetadataOp(), ShapeNOp); class RankOp : public XlaOpKernel { public: @@ -82,7 +82,7 @@ class RankOp : public XlaOpKernel { } }; -REGISTER_XLA_OP(Name("Rank").CompilationOnly(), RankOp); +REGISTER_XLA_OP(Name("Rank").CompilationOnly().IsMetadataOp(), RankOp); class SizeOp : public XlaOpKernel { public: @@ -101,7 +101,7 @@ class SizeOp : public XlaOpKernel { } }; -REGISTER_XLA_OP(Name("Size").CompilationOnly(), SizeOp); +REGISTER_XLA_OP(Name("Size").CompilationOnly().IsMetadataOp(), SizeOp); class ExpandDimsOp : public XlaOpKernel { public: diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc index b0eeee3174..91d48125f1 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.cc +++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc @@ -90,6 +90,11 @@ XlaOpRegistry::~XlaOpRegistry() = default; << " have incompatible compile time constant inputs."; return false; } + if (x.is_metadata_op != y.is_metadata_op) { + LOG(WARNING) << "Registrations of " << x.name + << " have incompatible values for is_metadata_op."; + return false; + } return true; } @@ -350,6 +355,20 @@ XlaOpRegistry::CompileTimeConstantInputs(const string& op) { return &it->second.front()->compile_time_constant_inputs; } +/*static*/ bool XlaOpRegistry::IsMetadataOp(const string& op) { + XlaOpRegistry& registry = Instance(); + mutex_lock lock(registry.mutex_); + auto it = registry.ops_.find(op); + if (it == registry.ops_.end() || it->second.empty()) { + return false; + } + + // The test in IsCompatible ensures that if there are multiple matching + // registrations for this op name, they all have the same value of + // is_metadata_op, so only the first match is returned. + return it->second.front()->is_metadata_op; +} + std::vector XlaOpRegistry::BackendNames() { std::vector names; XlaOpRegistry& registry = Instance(); @@ -432,6 +451,11 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::CompileTimeConstInput( return *this; } +XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::IsMetadataOp() { + registration_->is_metadata_op = true; + return *this; +} + std::unique_ptr XlaOpRegistrationBuilder::Build( XlaOpRegistry::Factory factory) { registration_->factory = factory; diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h index 34e22a4510..a4b624820a 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.h +++ b/tensorflow/compiler/tf2xla/xla_op_registry.h @@ -136,6 +136,10 @@ class XlaOpRegistry { static const std::unordered_set* CompileTimeConstantInputs( const string& op); + // Returns true if `op` is a "metadata" op, one that only looks at the shapes + // of its operands and not their values. + static bool IsMetadataOp(const string& op); + private: friend class XlaBackendRegistrar; friend class XlaOpRegistrar; @@ -192,6 +196,10 @@ class XlaOpRegistry { // Names of arguments that must be compile-time constants. std::unordered_set compile_time_constant_inputs; + // True if this is a "metadata" op, one that only looks at the shapes of its + // operands and not their values. + bool is_metadata_op = false; + // Factory used to build OpKernels that perform symbolic execution. Factory factory; }; @@ -256,6 +264,10 @@ class XlaOpRegistrationBuilder { // Mark 'input_name' as an argument whose value must be known at compile-time. XlaOpRegistrationBuilder& CompileTimeConstInput(absl::string_view input_name); + // Mark this op as a "metadata" op, one that only looks at the shapes of its + // operands and not their values. + XlaOpRegistrationBuilder& IsMetadataOp(); + std::unique_ptr Build( XlaOpRegistry::Factory factory); -- GitLab From cc3a7a847f0c73ae3de99f6b56ef02f56644ea67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 18 Sep 2018 13:06:51 +0800 Subject: [PATCH 0317/1357] CLN: minor changes --- tensorflow/python/estimator/canned/boosted_trees.py | 3 ++- tensorflow/python/keras/layers/advanced_activations.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 7c04ff7970..f2e7b37f7f 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -1084,7 +1084,8 @@ class _BoostedTrees(estimator.Estimator): Raises: ValueError: When attempting to normalize on an empty ensemble - or an ensemble of trees which have no splits. + or an ensemble of trees which have no splits. Or when attempting + to normalize and feature importances have negative values. """ reader = checkpoint_utils.load_checkpoint(self._model_dir) serialized = reader.get_tensor('boosted_trees:0_serialized') diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py index 6922d3ec1e..61ab69c16f 100644 --- a/tensorflow/python/keras/layers/advanced_activations.py +++ b/tensorflow/python/keras/layers/advanced_activations.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.keras import activations from tensorflow.python.keras import backend as K from tensorflow.python.keras import constraints from tensorflow.python.keras import initializers -- GitLab From 7c826588b058c14fd8c152bedb4e256c57ae1248 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 17 Sep 2018 22:09:02 -0700 Subject: [PATCH 0318/1357] Automated rollback of commit 185aa89912376d4088c22615908696cd30f9951b PiperOrigin-RevId: 213394522 --- tensorflow/contrib/gdr/gdr_memory_manager.cc | 102 ++++++----- tensorflow/contrib/verbs/rdma_mgr.cc | 81 ++++++--- tensorflow/contrib/verbs/rdma_mgr.h | 1 - tensorflow/contrib/verbs/verbs_server_lib.cc | 5 - tensorflow/core/BUILD | 1 + .../core/common_runtime/bfc_allocator.cc | 21 ++- .../core/common_runtime/bfc_allocator.h | 14 +- .../common_runtime/gpu/cuda_host_allocator.h | 12 +- .../common_runtime/gpu/gpu_bfc_allocator.cc | 17 +- .../common_runtime/gpu/gpu_bfc_allocator.h | 44 ++--- .../gpu/gpu_bfc_allocator_test.cc | 90 ++-------- .../gpu/gpu_cudamalloc_allocator.cc | 10 +- .../gpu/gpu_cudamalloc_allocator.h | 11 +- .../common_runtime/gpu/gpu_debug_allocator.cc | 20 ++- .../common_runtime/gpu/gpu_debug_allocator.h | 20 ++- .../gpu/gpu_debug_allocator_test.cc | 35 +--- .../core/common_runtime/gpu/gpu_device.cc | 64 +++---- .../core/common_runtime/gpu/gpu_device.h | 9 +- .../common_runtime/gpu/gpu_process_state.cc | 161 +++++++----------- .../common_runtime/gpu/gpu_process_state.h | 58 +++---- .../common_runtime/gpu/pool_allocator_test.cc | 68 ++------ .../core/common_runtime/mkl_cpu_allocator.h | 50 +++++- .../core/common_runtime/pool_allocator.cc | 45 +++-- .../core/common_runtime/pool_allocator.h | 27 ++- .../core/common_runtime/process_state.cc | 71 ++------ .../core/common_runtime/process_state.h | 15 +- .../core/common_runtime/renamed_device.h | 7 +- .../core/common_runtime/visitable_allocator.h | 79 +++++++++ tensorflow/core/framework/allocator.cc | 20 +-- tensorflow/core/framework/allocator.h | 28 +-- tensorflow/core/framework/device_base.h | 10 +- tensorflow/core/framework/op_kernel.cc | 9 +- 32 files changed, 577 insertions(+), 628 deletions(-) create mode 100644 tensorflow/core/common_runtime/visitable_allocator.h diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index bb06f1c41c..726f74c7b7 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -138,8 +138,6 @@ class GdrMemoryManager : public RemoteMemoryManager { Device* device, DeviceContext* device_context, bool on_host, StatusCallback done) override; - static void RegMemVisitors(); - protected: Status CreateEndpoint(const string& host, const string& port, RdmaEndpointPtr& endpoint); @@ -185,51 +183,35 @@ class GdrMemoryManager : public RemoteMemoryManager { TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager); }; +// TODO(byronyi): remove this class and its registration when the default +// cpu_allocator() returns visitable allocator, or cpu_allocator() is no +// longer in use. +class BFCGdrAllocator : public BFCAllocator { + public: + BFCGdrAllocator() + : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, + true, "cpu_gdr_bfc") {} +}; +class BFCGdrAllocatorFactory : public AllocatorFactory { + public: + Allocator* CreateAllocator() override { return new BFCGdrAllocator; } + + virtual SubAllocator* CreateSubAllocator(int numa_node) { + return new BasicCPUAllocator(numa_node); + } +}; + +REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 102, BFCGdrAllocatorFactory); + GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) : host_(host), port_(port), listening_(nullptr, EndpointDeleter), stopped_(true), - next_key_(0) { - static std::once_flag flag; - std::call_once(flag, []() { RegMemVisitors(); }); -} + next_key_(0) {} GdrMemoryManager::~GdrMemoryManager() { close(epfd_); } -/*static*/ void GdrMemoryManager::RegMemVisitors() { - SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("CPU:", numa_node)); - }; - SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes); - }; - ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); - ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); - -#if GOOGLE_CUDA - if (IsGDRAvailable()) { - int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - - // Note we don't free allocated GPU memory so there is no free visitor - SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); - }; - GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, - cuda_alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, - alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); - LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; - } -#endif // GOOGLE_CUDA -} - Status GdrMemoryManager::Init() { epfd_ = epoll_create1(0); if (epfd_ == -1) { @@ -289,6 +271,48 @@ Status GdrMemoryManager::Init() { "cannot add server to epoll"); } + Allocator* allocators[] = { +#if GOOGLE_CUDA + GPUProcessState::singleton()->GetCUDAHostAllocator(0), +#endif // GOOGLE_CUDA + ProcessState::singleton()->GetCPUAllocator(0), + cpu_allocator(), + }; + + using namespace std::placeholders; + VisitableAllocator::Visitor alloc_visitor = + std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); + VisitableAllocator::Visitor free_visitor = + std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2); + + std::set instrumented_; + + // Host memory allocators + for (Allocator* allocator : allocators) { + auto* visitable_allocator = dynamic_cast(allocator); + CHECK(visitable_allocator) + << "is not visitable for instrumentation" << allocator->Name(); + // Make sure we don't instrument the same allocator twice + if (instrumented_.find(allocator) == std::end(instrumented_)) { + visitable_allocator->AddAllocVisitor(alloc_visitor); + visitable_allocator->AddFreeVisitor(free_visitor); + instrumented_.insert(allocator); + LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); + } + } + +#if GOOGLE_CUDA + VisitableAllocator::Visitor cuda_alloc_visitor = + std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); + if (IsGDRAvailable()) { + // Note we don't free allocated GPU memory so there is no free visitor + int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1; + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); + LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; + } +#endif // GOOGLE_CUDA + return Status::OK(); } diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 2784bf124c..3cb5e61fac 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/contrib/verbs/grpc_verbs_client.h" #include "tensorflow/contrib/verbs/verbs_service.pb.h" +#include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/pool_allocator.h" @@ -28,7 +29,6 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/strings/strcat.h" namespace tensorflow { @@ -256,41 +256,74 @@ void MRDeleter(ibv_mr* mr) { } } +// TODO(byronyi): remove this class and its registration when the default +// cpu_allocator() returns visitable allocator, or cpu_allocator() is no +// longer in use. +class BFCRdmaAllocator : public BFCAllocator { + public: + BFCRdmaAllocator() + : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, + true, "cpu_rdma_bfc") {} +}; +class BFCRdmaAllocatorFactory : public AllocatorFactory { + public: + Allocator* CreateAllocator() { return new BFCRdmaAllocator; } + + SubAllocator* CreateSubAllocator(int numa_node) { + return new BasicCPUAllocator(numa_node); + } +}; + +REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory); + void RdmaMgr::InitAllocators() { - static std::once_flag flag; - std::call_once( - flag, [this]() { RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; }); -} + RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; -/*static*/ void RdmaMgr::RegMemVisitors() { - SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("CPU:", numa_node)); - }; - SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().EvictMemoryRegion(ptr, num_bytes); + Allocator* allocators[] = { +#if GOOGLE_CUDA + GPUProcessState::singleton()->GetCUDAHostAllocator(0), +#endif // GOOGLE_CUDA + ProcessState::singleton()->GetCPUAllocator(0), + cpu_allocator(), }; - ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); - ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); + using namespace std::placeholders; + + std::set instrumented_; + + // Host memory allocators + for (Allocator* allocator : allocators) { + VisitableAllocator::Visitor alloc_visitor = + std::bind(&RdmaMemoryMgr::InsertMemoryRegion, + &RdmaMemoryMgr::Singleton(), _1, _2, allocator->Name()); + VisitableAllocator::Visitor free_visitor = std::bind( + &RdmaMemoryMgr::EvictMemoryRegion, &RdmaMemoryMgr::Singleton(), _1, _2); + + auto* visitable_allocator = dynamic_cast(allocator); + CHECK(visitable_allocator) + << "is not visitable for instrumentation" << allocator->Name(); + // Make sure we don't instrument the same allocator twice + if (instrumented_.find(allocator) == std::end(instrumented_)) { + visitable_allocator->AddAllocVisitor(alloc_visitor); + visitable_allocator->AddFreeVisitor(free_visitor); + instrumented_.insert(allocator); + LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); + } + } #if GOOGLE_CUDA if (IsGDRAvailable()) { // Note we don't free allocated GPU memory so there is no free visitor int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); - }; + char buf[8]; + sprintf(buf, "gpu"); + VisitableAllocator::Visitor cuda_alloc_visitor = + std::bind(&RdmaMemoryMgr::InsertMemoryRegion, + &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf)); + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, - alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; } #endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index 74b92cc9a6..9fffc335bb 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -39,7 +39,6 @@ class RdmaMgr { void SetupChannels(); bool ConnectivityCheck(); void InitAllocators(); - static void RegMemVisitors(); const string& local_worker() { return local_worker_; } private: diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc index 61469686e4..1a0b5028fe 100644 --- a/tensorflow/contrib/verbs/verbs_server_lib.cc +++ b/tensorflow/contrib/verbs/verbs_server_lib.cc @@ -76,13 +76,8 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def, return Status::OK(); } -namespace { -std::once_call reg_mem_visitors_call; -} // namespace - Status VerbsServer::Init(ServiceInitFunction service_func, RendezvousMgrCreationFunction rendezvous_mgr_func) { - std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); }); Status s = GrpcServer::Init(service_func, rendezvous_mgr_func); { mutex_lock l(mu_); diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9bcf5b0865..d55bd8d7ed 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2783,6 +2783,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", "common_runtime/tracing_device.h", + "common_runtime/visitable_allocator.h", "common_runtime/process_state.h", "common_runtime/pool_allocator.h", "graph/gradients.h", diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 3843ea9e60..84c6285bbe 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -31,7 +31,7 @@ namespace tensorflow { BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, bool allow_growth, const string& name) - : sub_allocator_(sub_allocator), + : suballocator_(sub_allocator), name_(name), free_chunks_list_(kInvalidChunkHandle), next_allocation_id_(1) { @@ -72,7 +72,7 @@ BFCAllocator::~BFCAllocator() { VLOG(2) << "Number of regions allocated: " << region_manager_.regions().size(); for (const auto& region : region_manager_.regions()) { - sub_allocator_->Free(region.ptr(), region.memory_size()); + suballocator_->Free(region.ptr(), region.memory_size()); } for (BinNum b = 0; b < kNumBins; b++) { @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = sub_allocator_->Alloc(alignment, bytes); + void* mem_addr = suballocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = sub_allocator_->Alloc(alignment, bytes); + mem_addr = suballocator_->Alloc(alignment, bytes); } } @@ -158,6 +158,10 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Insert the chunk into the right bin. InsertFreeChunkIntoBin(h); + // Invoke visitors on newly allocated region. + for (const auto& visitor : region_visitors_) { + visitor(mem_addr, bytes); + } return true; } @@ -486,6 +490,15 @@ void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) { InsertFreeChunkIntoBin(coalesced_chunk); } +void BFCAllocator::AddAllocVisitor(Visitor visitor) { + VLOG(1) << "AddVisitor"; + mutex_lock l(lock_); + region_visitors_.push_back(visitor); + for (const auto& region : region_manager_.regions()) { + visitor(region.ptr(), region.memory_size()); + } +} + bool BFCAllocator::TracksAllocationSizes() { return true; } size_t BFCAllocator::RequestedSize(const void* ptr) { diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index 364071e066..20e1dab1d5 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -23,7 +23,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/allocator_retry.h" -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/macros.h" @@ -42,7 +42,7 @@ namespace tensorflow { // coalescing. One assumption we make is that the process using this // allocator owns pretty much all of the memory, and that nearly // all requests to allocate memory go through this interface. -class BFCAllocator : public Allocator { +class BFCAllocator : public VisitableAllocator { public: // Takes ownership of sub_allocator. BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, @@ -55,6 +55,11 @@ class BFCAllocator : public Allocator { const AllocationAttributes& allocation_attr) override; void DeallocateRaw(void* ptr) override; + void AddAllocVisitor(Visitor visitor) override; + + // Does nothing, because memory is never freed. + void AddFreeVisitor(Visitor visitor) override {} + bool TracksAllocationSizes() override; size_t RequestedSize(const void* ptr) override; @@ -418,7 +423,7 @@ class BFCAllocator : public Allocator { // of the available memory. bool started_backpedal_ = false; - std::unique_ptr sub_allocator_; + std::unique_ptr suballocator_; string name_; // Structures mutable after construction @@ -430,6 +435,9 @@ class BFCAllocator : public Allocator { // Pointer to head of linked list of free Chunks ChunkHandle free_chunks_list_ GUARDED_BY(lock_); + // Called once on each region, ASAP. + std::vector region_visitors_ GUARDED_BY(lock_); + // Counter containing the next unique identifier to assign to a // newly-created chunk. int64 next_allocation_id_ GUARDED_BY(lock_); diff --git a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h index 6bd29ef775..636cd43575 100644 --- a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h +++ b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h @@ -26,12 +26,8 @@ namespace tensorflow { class CUDAHostAllocator : public SubAllocator { public: // Note: stream_exec cannot be null. - explicit CUDAHostAllocator(se::StreamExecutor* stream_exec, int numa_node, - const std::vector& alloc_visitors, - const std::vector& free_visitors) - : SubAllocator(alloc_visitors, free_visitors), - stream_exec_(stream_exec), - numa_node_(numa_node) { + explicit CUDAHostAllocator(se::StreamExecutor* stream_exec) + : stream_exec_(stream_exec) { CHECK(stream_exec_ != nullptr); } ~CUDAHostAllocator() override {} @@ -43,23 +39,19 @@ class CUDAHostAllocator : public SubAllocator { if (ptr == nullptr) { LOG(WARNING) << "could not allocate pinned host memory of size: " << num_bytes; - return ptr; } - VisitAlloc(ptr, numa_node_, num_bytes); } return ptr; } void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { - VisitFree(ptr, numa_node_, num_bytes); stream_exec_->HostMemoryDeallocate(ptr); } } private: se::StreamExecutor* stream_exec_; // not owned, non-null - const int numa_node_; TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc index 44ffce77a1..2d4c8d0201 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc @@ -22,15 +22,18 @@ limitations under the License. namespace tensorflow { -GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator, - size_t total_memory, const string& name) - : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {} +GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + const string& name) + : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {} -GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator, - size_t total_memory, +GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, const GPUOptions& gpu_options, const string& name) - : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(), - name) {} + : BFCAllocator( + new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + gpu_options.per_process_gpu_memory_fraction() > 1.0 || + gpu_options.experimental().use_unified_memory()), + total_memory, gpu_options.allow_growth(), name) {} } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index 6b6de80734..f1cc2eace1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -31,20 +31,28 @@ limitations under the License. namespace tensorflow { -// Suballocator for GPU memory. -class GPUMemAllocator : public SubAllocator { +// A GPU memory allocator that implements a 'best-fit with coalescing' +// algorithm. +class GPUBFCAllocator : public BFCAllocator { public: // 'cuda_gpu_id' refers to the ID of the GPU device within // the process and must reference a valid ID in the process. + GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + const string& name); + GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + const GPUOptions& gpu_options, const string& name); + virtual ~GPUBFCAllocator() {} + + TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); +}; + +// Suballocator for GPU memory. +class GPUMemAllocator : public SubAllocator { + public: // Note: stream_exec cannot be null. - explicit GPUMemAllocator(se::StreamExecutor* stream_exec, CudaGpuId gpu_id, - bool use_unified_memory, - const std::vector& alloc_visitors, - const std::vector& free_visitors) - : SubAllocator(alloc_visitors, free_visitors), - stream_exec_(stream_exec), - gpu_id_(gpu_id), - use_unified_memory_(use_unified_memory) { + explicit GPUMemAllocator(se::StreamExecutor* stream_exec, + bool use_unified_memory) + : stream_exec_(stream_exec), use_unified_memory_(use_unified_memory) { CHECK(stream_exec_ != nullptr); } ~GPUMemAllocator() override {} @@ -57,14 +65,12 @@ class GPUMemAllocator : public SubAllocator { } else { ptr = stream_exec_->AllocateArray(num_bytes).opaque(); } - VisitAlloc(ptr, gpu_id_.value(), num_bytes); } return ptr; } void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { - VisitFree(ptr, gpu_id_.value(), num_bytes); if (use_unified_memory_) { stream_exec_->UnifiedMemoryDeallocate(ptr); } else { @@ -76,25 +82,11 @@ class GPUMemAllocator : public SubAllocator { private: se::StreamExecutor* stream_exec_; // not owned, non-null - const CudaGpuId gpu_id_; const bool use_unified_memory_ = false; TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator); }; -// A GPU memory allocator that implements a 'best-fit with coalescing' -// algorithm. -class GPUBFCAllocator : public BFCAllocator { - public: - GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory, - const string& name); - GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory, - const GPUOptions& gpu_options, const string& name); - ~GPUBFCAllocator() override {} - - TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); -}; - } // namespace tensorflow #endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_BFC_ALLOCATOR_H_ diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 7112c3afd4..67caeb3495 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -21,7 +21,6 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -47,11 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use, } TEST(GPUBFCAllocatorTest, NoDups) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); // Allocate a lot of raw pointers @@ -80,11 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) { } TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); // Allocate 256 raw pointers of sizes between 100 bytes and about // a meg random::PhiloxRandom philox(123, 17); @@ -142,11 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { } TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); float* first_ptr = a.Allocate(1024); @@ -181,30 +168,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { } TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); float* ptr = a.Allocate(0); EXPECT_EQ(nullptr, ptr); } TEST(GPUBFCAllocatorTest, TracksSizes) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); @@ -212,12 +187,8 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { } TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); // Configure a 1MiB byte limit - GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc"); float* first_ptr = a.Allocate(1 << 6); float* second_ptr = a.Allocate(1 << 20); @@ -232,11 +203,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { options.set_allow_growth(true); // Max of 2GiB, but starts out small. - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1LL << 31, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc"); // Allocate 10 raw pointers of sizes between 100 bytes and about // 64 megs. @@ -297,15 +264,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { } TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1UL << 60, "GPU_0_bfc"); - sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator b(sub_allocator, 1UL << 60, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); + GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); void* amem = a.AllocateRaw(1, 1); void* bmem = b.AllocateRaw(1, 1 << 30); a.DeallocateRaw(amem); @@ -313,11 +273,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { } static void BM_Allocation(int iters) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 524288, 512, 1048576, 10485760, 104857600, @@ -333,11 +289,7 @@ static void BM_Allocation(int iters) { BENCHMARK(BM_Allocation); static void BM_AllocationThreaded(int iters, int num_threads) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); thread::ThreadPool pool(Env::Default(), "test", num_threads); std::atomic_int_fast32_t count(iters); mutex done_lock; @@ -373,11 +325,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16); // A more complex benchmark that defers deallocation of an object for // "delay" allocations. static void BM_AllocationDelayed(int iters, int delay) { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 4096, 512, 1024, 1024}; int size_index = 0; @@ -415,11 +363,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { // only methods inside this class can access private members of BFCAllocator. void TestBinDebugInfo() { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); std::vector initial_ptrs; std::vector initial_ptrs_allocated_sizes; @@ -497,11 +441,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { } void TestLog2FloorNonZeroSlow() { - CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUBFCAllocator a(sub_allocator, 1 /* total_memory */, "GPU_0_bfc"); + GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0)); EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1)); EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc index 8e14f1ea75..934a57a5fb 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc @@ -27,7 +27,7 @@ limitations under the License. namespace tensorflow { -GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator, +GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -60,6 +60,14 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) { #endif // GOOGLE_CUDA } +void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) { + return base_allocator_->AddAllocVisitor(visitor); +} + +void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) { + return base_allocator_->AddFreeVisitor(visitor); +} + bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 3d1d0ef481..856fdc34b4 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -19,7 +19,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -29,17 +29,20 @@ namespace tensorflow { // An allocator that wraps a GPU allocator and adds debugging // functionality that verifies that users do not write outside their // allocated memory. -class GPUcudaMallocAllocator : public Allocator { +class GPUcudaMallocAllocator : public VisitableAllocator { public: - explicit GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); + explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, + CudaGpuId cuda_gpu_id); ~GPUcudaMallocAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; + void AddAllocVisitor(Visitor visitor) override; + void AddFreeVisitor(Visitor visitor) override; bool TracksAllocationSizes() override; private: - Allocator* base_allocator_ = nullptr; // owned + VisitableAllocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index 6bad66dcec..e4c834b30d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -73,7 +73,7 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { // ----------------------------------------------------------------------------- // GPUDebugAllocator // ----------------------------------------------------------------------------- -GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator, +GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -111,6 +111,14 @@ void GPUDebugAllocator::DeallocateRaw(void* ptr) { base_allocator_->DeallocateRaw(ptr); } +void GPUDebugAllocator::AddAllocVisitor(Visitor visitor) { + return base_allocator_->AddAllocVisitor(visitor); +} + +void GPUDebugAllocator::AddFreeVisitor(Visitor visitor) { + return base_allocator_->AddFreeVisitor(visitor); +} + bool GPUDebugAllocator::TracksAllocationSizes() { return true; } size_t GPUDebugAllocator::RequestedSize(const void* ptr) { @@ -150,7 +158,7 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) { // ----------------------------------------------------------------------------- // GPUNanResetAllocator // ----------------------------------------------------------------------------- -GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator, +GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -192,6 +200,14 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) { base_allocator_->DeallocateRaw(ptr); } +void GPUNanResetAllocator::AddAllocVisitor(Visitor visitor) { + return base_allocator_->AddAllocVisitor(visitor); +} + +void GPUNanResetAllocator::AddFreeVisitor(Visitor visitor) { + return base_allocator_->AddFreeVisitor(visitor); +} + size_t GPUNanResetAllocator::RequestedSize(const void* ptr) { return base_allocator_->RequestedSize(ptr); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index 0f27ff4384..0f9b72040c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -21,7 +21,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -31,13 +31,16 @@ namespace tensorflow { // An allocator that wraps a GPU allocator and adds debugging // functionality that verifies that users do not write outside their // allocated memory. -class GPUDebugAllocator : public Allocator { +class GPUDebugAllocator : public VisitableAllocator { public: - explicit GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); + explicit GPUDebugAllocator(VisitableAllocator* allocator, + CudaGpuId cuda_gpu_id); ~GPUDebugAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; + void AddAllocVisitor(Visitor visitor) override; + void AddFreeVisitor(Visitor visitor) override; bool TracksAllocationSizes() override; size_t RequestedSize(const void* ptr) override; size_t AllocatedSize(const void* ptr) override; @@ -50,7 +53,7 @@ class GPUDebugAllocator : public Allocator { bool CheckFooter(void* ptr); private: - Allocator* base_allocator_ = nullptr; // owned + VisitableAllocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. @@ -60,20 +63,23 @@ class GPUDebugAllocator : public Allocator { // An allocator that wraps a GPU allocator and resets the memory on // allocation and free to 'NaN', helping to identify cases where the // user forgets to initialize the memory. -class GPUNanResetAllocator : public Allocator { +class GPUNanResetAllocator : public VisitableAllocator { public: - explicit GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); + explicit GPUNanResetAllocator(VisitableAllocator* allocator, + CudaGpuId cuda_gpu_id); ~GPUNanResetAllocator() override; string Name() override { return "gpu_nan_reset"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; + void AddAllocVisitor(Visitor visitor) override; + void AddFreeVisitor(Visitor visitor) override; size_t RequestedSize(const void* ptr) override; size_t AllocatedSize(const void* ptr) override; void GetStats(AllocatorStats* stats) override; void ClearStats() override; private: - Allocator* base_allocator_ = nullptr; // owned + VisitableAllocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index 98283cd846..236a0afa0b 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -35,10 +35,7 @@ namespace { TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { const CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -62,10 +59,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { EXPECT_DEATH( { const CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), - cuda_gpu_id, false /*use_unified_memory*/, {}, {}); - GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -98,10 +92,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { EXPECT_DEATH( { const CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), - cuda_gpu_id, false /*use_unified_memory*/, {}, {}); - GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -131,10 +122,7 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { TEST(GPUDebugAllocatorTest, ResetToNan) { const CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -175,11 +163,8 @@ TEST(GPUDebugAllocatorTest, ResetToNan) { TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { const CudaGpuId cuda_gpu_id(0); // NaN reset must be the outer-most allocator. - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -220,21 +205,15 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { TEST(GPUDebugAllocatorTest, TracksSizes) { const CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); - GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { const CudaGpuId cuda_gpu_id(0); - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - false /*use_unified_memory*/, {}, {}); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), + new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), cuda_gpu_id), cuda_gpu_id); float* t1 = a.Allocate(1); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 50e61b7e00..2763ac0d4a 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/common_runtime/local_device.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/op_kernel.h" @@ -284,38 +285,6 @@ BaseGPUDevice::~BaseGPUDevice() { for (auto ctx : device_contexts_) ctx->Unref(); } -// This should be idempotent if already initialized. -Status BaseGPUDevice::InitScratchBuffers() { - mutex_lock l(scratch_init_mutex_); - if (scratch_.size() < max_streams_) { - for (int i = 0; i < max_streams_; i++) { - DCHECK(streams_[i]); - if (scratch_.size() > i && scratch_[i]) continue; - size_t scratch_buffer_size = - Eigen::kCudaScratchSize + sizeof(unsigned int); - void* scratch_buffer = gpu_allocator_->AllocateRaw( - Allocator::kAllocatorAlignment, scratch_buffer_size); - if (scratch_buffer == nullptr) { - return errors::FailedPrecondition( - "Failed to allocate scratch buffer for device ", - tf_gpu_id_.value()); - } - se::DeviceMemory mem( - se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); - - bool ok = executor_->SynchronousMemZero( - &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); - if (!ok) { - return errors::FailedPrecondition( - "Failed to memcopy into scratch buffer for device ", - tf_gpu_id_.value()); - } - scratch_.push_back(static_cast(scratch_buffer)); - } - } - return Status::OK(); -} - Status BaseGPUDevice::Init(const SessionOptions& options) { auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_); if (!executor_status.status().ok()) { @@ -334,6 +303,27 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { for (int i = 0; i < max_streams_; i++) { streams_.push_back(StreamGroupFactory::Global().GetOrCreate( tf_gpu_id_, i, executor_, options.config.gpu_options())); + + size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int); + void* scratch_buffer = gpu_allocator_->AllocateRaw( + Allocator::kAllocatorAlignment, scratch_buffer_size); + if (scratch_buffer == nullptr) { + return errors::FailedPrecondition( + "Failed to allocate scratch buffer for device ", tf_gpu_id_.value()); + } + scratch_.push_back(static_cast(scratch_buffer)); + + se::DeviceMemory mem( + se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); + + bool ok = executor_->SynchronousMemZero( + &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); + if (!ok) { + return errors::FailedPrecondition( + "Failed to memcopy into scratch buffer for device ", + tf_gpu_id_.value()); + } + device_contexts_.push_back(new GPUDeviceContext( i, streams_.back()->compute, streams_.back()->host_to_device, streams_.back()->device_to_host, streams_.back()->device_to_device)); @@ -877,11 +867,10 @@ PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() { return new ConcretePerOpGpuDevice(); } -Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, - PerOpGpuDevice* device, - DeviceContext* dc, - Allocator* allocator) { - TF_RETURN_IF_ERROR(InitScratchBuffers()); +void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, + PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) { if (dc) { const GPUDeviceContext* gpu_dc = static_cast(dc); const int stream_id = gpu_dc->stream_id(); @@ -892,7 +881,6 @@ Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, } else { ReinitializeDevice(context, device, 0, allocator); } - return Status::OK(); } Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr, diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index b3eea55758..56d03d7a8c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -86,9 +86,8 @@ class BaseGPUDevice : public LocalDevice { // The caller owns the returned device. PerOpGpuDevice* MakeGpuDevice() override; - Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, - Allocator* allocator) override; + void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, Allocator* allocator) override; // Returns the CUDA GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. @@ -126,7 +125,6 @@ class BaseGPUDevice : public LocalDevice { class StreamGroupFactory; gtl::InlinedVector streams_; - mutex scratch_init_mutex_; gtl::InlinedVector scratch_; std::vector device_contexts_; GpuDeviceInfo* gpu_device_info_ = nullptr; @@ -137,9 +135,6 @@ class BaseGPUDevice : public LocalDevice { std::unique_ptr em_; std::unique_ptr thread_pool_; - // Initialize scractch buffers used by Eigen. - Status InitScratchBuffers(); - void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index 9ec740fabe..b18688174d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -76,16 +76,12 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) { // This function is defined for debugging problems with the allocators. GPUProcessState::~GPUProcessState() { CHECK_EQ(this, instance_); + for (auto p : gpu_allocators_) { + delete p; + } instance_ = nullptr; } -int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) { - // Return the NUMA node associated with the GPU's StreamExecutor. - se::StreamExecutor* se = - GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie(); - return se->GetDeviceDescription().numa_node(); -} - Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, TfGpuId tf_gpu_id, size_t total_bytes) { @@ -97,10 +93,13 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, if (tf_gpu_id.value() >= static_cast(gpu_allocators_.size())) { gpu_allocators_.resize(tf_gpu_id.value() + 1); + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) + gpu_al_.resize(tf_gpu_id.value() + 1); } - AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()]; - if (allocator_parts.allocator.get() == nullptr) { + if (gpu_allocators_[tf_gpu_id.value()] == nullptr) { + VisitableAllocator* gpu_allocator; + // Validate allocator types. if (!allocator_type.empty() && allocator_type != "BFC") { LOG(ERROR) << "Invalid allocator type: " << allocator_type; @@ -109,17 +108,8 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, CudaGpuId cuda_gpu_id; TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - int bus_id = BusIdForGPU(tf_gpu_id); - while (bus_id >= gpu_visitors_.size()) { - gpu_visitors_.push_back({}); - } - GPUMemAllocator* sub_allocator = new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, - (options.per_process_gpu_memory_fraction() > 1.0 || - options.experimental().use_unified_memory()), - gpu_visitors_[bus_id], {}); - Allocator* gpu_allocator = - new GPUBFCAllocator(sub_allocator, total_bytes, options, + gpu_allocator = + new GPUBFCAllocator(cuda_gpu_id, total_bytes, options, strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc")); // If true, checks for memory overwrites by writing @@ -133,25 +123,34 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, // **WARNING** probably will not work in a multi-gpu scenario gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id); } - - Allocator* recording_allocator = nullptr; + gpu_allocators_[tf_gpu_id.value()] = gpu_allocator; + + // If there are any pending AllocVisitors for this bus, add + // them now. + se::StreamExecutor* se = + GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie(); + int bus_id = se->GetDeviceDescription().numa_node(); + if (bus_id >= 0 && bus_id < static_cast(gpu_visitors_.size())) { + for (const auto& v : gpu_visitors_[bus_id]) { + gpu_allocator->AddAllocVisitor(v); + } + } if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::GPU; md.dev_index = cuda_gpu_id.value(); md.gpu_registered = false; md.nic_registered = true; - recording_allocator = new internal::RecordingAllocator( + if (static_cast(gpu_al_.size()) <= tf_gpu_id.value()) { + gpu_al_.resize(tf_gpu_id.value() + 1); + } + gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator( &process_state_->mem_desc_map_, gpu_allocator, md, &mu_); } - allocator_parts = {std::unique_ptr(gpu_allocator), sub_allocator, - std::unique_ptr(recording_allocator)}; - } - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { - return allocator_parts.recording_allocator.get(); - } else { - return allocator_parts.allocator.get(); } + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) + return gpu_al_[tf_gpu_id.value()]; + return gpu_allocators_[tf_gpu_id.value()]; #else LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda."; return nullptr; @@ -173,12 +172,11 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { tf_shared_lock lock(mu_); if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types && - !cuda_host_allocators_.empty() && - cuda_host_allocators_[0].recording_allocator != nullptr) { - return cuda_host_allocators_[0].recording_allocator.get(); + static_cast(cuda_al_.size()) > 0) { + return cuda_al_[0]; } if (static_cast(cuda_host_allocators_.size()) > numa_node) { - return cuda_host_allocators_[0].allocator.get(); + return cuda_host_allocators_[0]; } } @@ -192,7 +190,7 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { // it knows is valid. se::StreamExecutor* se = nullptr; for (int i = 0; i < static_cast(gpu_allocators_.size()); ++i) { - if (gpu_allocators_[i].allocator != nullptr) { + if (gpu_allocators_[i] != nullptr) { se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie(); break; } @@ -201,15 +199,6 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { CHECK_NE(nullptr, se); while (static_cast(cuda_host_allocators_.size()) <= numa_node) { - while (cuda_host_alloc_visitors_.size() <= numa_node) { - cuda_host_alloc_visitors_.push_back({}); - } - while (cuda_host_free_visitors_.size() <= numa_node) { - cuda_host_free_visitors_.push_back({}); - } - SubAllocator* sub_allocator = new CUDAHostAllocator( - se, numa_node, cuda_host_alloc_visitors_[numa_node], - cuda_host_free_visitors_[numa_node]); // TODO(zheng-xq): evaluate whether 64GB by default is the best choice. int64 cuda_host_mem_limit_in_mb = -1; Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB", @@ -219,92 +208,62 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message(); } int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20); - Allocator* allocator = - new BFCAllocator(sub_allocator, cuda_host_mem_limit, + VisitableAllocator* allocator = + new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit, true /*allow_growth*/, "cuda_host_bfc" /*name*/); - if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) { + if (LogMemory::IsEnabled()) { // Wrap the allocator to track allocation ids for better logging // at the cost of performance. - allocator = new TrackingAllocator(allocator, true); + allocator = new TrackingVisitableAllocator(allocator, true); } - cuda_host_allocators_.push_back({std::unique_ptr(allocator), - sub_allocator, - std::unique_ptr(nullptr)}); - AllocatorParts& allocator_parts = cuda_host_allocators_.back(); + cuda_host_allocators_.push_back(allocator); if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::CPU; md.dev_index = 0; md.gpu_registered = true; md.nic_registered = false; - allocator_parts.recording_allocator.reset( - new internal::RecordingAllocator(&process_state_->mem_desc_map_, - allocator_parts.allocator.get(), md, - &mu_)); + cuda_al_.push_back(new internal::RecordingAllocator( + &process_state_->mem_desc_map_, cuda_host_allocators_.back(), md, + &mu_)); } } - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { - return cuda_host_allocators_[0].recording_allocator.get(); - } else { - return cuda_host_allocators_[0].allocator.get(); - } + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) + return cuda_al_[0]; + return cuda_host_allocators_[0]; } void GPUProcessState::AddGPUAllocVisitor(int bus_id, - const SubAllocator::Visitor& visitor) { + const AllocVisitor& visitor) { + CHECK(process_state_); #if GOOGLE_CUDA mutex_lock lock(mu_); - CHECK(gpu_allocators_.empty()) // Crash OK - << "AddGPUAllocVisitor must be called before " - "first call to GetGPUAllocator."; + for (int i = 0; i < static_cast(gpu_allocators_.size()); ++i) { + se::StreamExecutor* se = + GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie(); + if (gpu_allocators_[i] && + (se->GetDeviceDescription().numa_node() + 1) == bus_id) { + gpu_allocators_[i]->AddAllocVisitor(visitor); + } + } while (bus_id >= static_cast(gpu_visitors_.size())) { - gpu_visitors_.push_back(std::vector()); + gpu_visitors_.push_back(std::vector()); } gpu_visitors_[bus_id].push_back(visitor); #endif // GOOGLE_CUDA } -void GPUProcessState::AddCUDAHostAllocVisitor( - int numa_node, const SubAllocator::Visitor& visitor) { -#if GOOGLE_CUDA - mutex_lock lock(mu_); - CHECK(cuda_host_allocators_.empty()) // Crash OK - << "AddCUDAHostAllocVisitor must be called before " - "first call to GetCUDAHostAllocator."; - while (numa_node >= static_cast(cuda_host_alloc_visitors_.size())) { - cuda_host_alloc_visitors_.push_back(std::vector()); - } - cuda_host_alloc_visitors_[numa_node].push_back(visitor); -#endif // GOOGLE_CUDA -} - -void GPUProcessState::AddCUDAHostFreeVisitor( - int numa_node, const SubAllocator::Visitor& visitor) { -#if GOOGLE_CUDA - mutex_lock lock(mu_); - CHECK(cuda_host_allocators_.empty()) // Crash OK - << "AddCUDAHostFreeVisitor must be called before " - "first call to GetCUDAHostAllocator."; - while (numa_node >= static_cast(cuda_host_free_visitors_.size())) { - cuda_host_free_visitors_.push_back(std::vector()); - } - cuda_host_free_visitors_[numa_node].push_back(visitor); -#endif // GOOGLE_CUDA -} - void GPUProcessState::TestOnlyReset() { - if (process_state_) { - process_state_->ProcessState::TestOnlyReset(); - } + process_state_->ProcessState::TestOnlyReset(); { mutex_lock lock(mu_); gpu_device_enabled_ = false; - gpu_allocators_.clear(); gpu_visitors_.clear(); - cuda_host_allocators_.clear(); - cuda_host_alloc_visitors_.clear(); - cuda_host_free_visitors_.clear(); + gtl::STLDeleteElements(&gpu_allocators_); + gtl::STLDeleteElements(&cuda_host_allocators_); + gtl::STLDeleteElements(&gpu_al_); + gtl::STLDeleteElements(&cuda_al_); } } diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h index 43e9a31660..cb41c3c6bd 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h @@ -32,6 +32,7 @@ limitations under the License. namespace tensorflow { class Allocator; +class VisitableAllocator; class PoolAllocator; // Singleton that manages per-process state when GPUs are present. @@ -71,30 +72,18 @@ class GPUProcessState { virtual Allocator* GetCUDAHostAllocator(int numa_node); - // Registers a Visitor to be invoked on new chunks of memory allocated by the - // SubAllocator of every GPU proximate to the specified bus. The AllocVisitor - // is provided with a memory pointer, a GPU id, and the size of the area it - // identifies. The pointer is not guaranteed to be valid after the call - // terminates. The intention is for this interface to be used for network - // device memory registration. "bus_id" is platform-specific. On many - // platforms it should be 0. On machines with multiple PCIe buses, it should - // be the index of one of the PCIe buses (maybe the NUMA node at which the - // PCIe is rooted). If the bus_id is invalid, results are undefined. - virtual void AddGPUAllocVisitor(int bus_id, - const SubAllocator::Visitor& visitor); - - // Registers a Visitor to be invoked on new chunks of memory allocated by - // the SubAllocator of the CUDAHostAllocator for the given numa_node. - virtual void AddCUDAHostAllocVisitor(int numa_node, - const SubAllocator::Visitor& visitor); - - // Registers a Visitor to be invoked on each chunk handed back for freeing to - // the SubAllocator of the CUDAHostAllocator for the given numa_node. - virtual void AddCUDAHostFreeVisitor(int numa_node, - const SubAllocator::Visitor& visitor); - - // Returns bus_id for the given GPU id. - virtual int BusIdForGPU(TfGpuId tf_gpu_id); + // Registers a function to be called once on every new Region + // allocated by every GPURegionAllocator proximate to the specified + // bus. The AllocVisitor is provided with a memory pointer and the + // size of the area it identifies. The pointer is not guaranteed to + // be valid after the call terminates. The intention is for this + // interface to be used for network device memory registration. + // "bus_id" is platform-specific. On many platforms it + // should be 0. On machines with multiple PCIe buses, it should be + // the index of one of the PCIe buses. If the bus_id is invalid, + // results are undefined. + typedef std::function AllocVisitor; + virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor); protected: GPUProcessState(); @@ -114,22 +103,17 @@ class GPUProcessState { mutex mu_; - struct AllocatorParts { - std::unique_ptr allocator; - SubAllocator* sub_allocator; // owned by allocator - std::unique_ptr recording_allocator; - }; - std::vector gpu_allocators_ GUARDED_BY(mu_); - std::vector> gpu_visitors_ GUARDED_BY(mu_); - - std::vector cuda_host_allocators_ GUARDED_BY(mu_); - std::vector> cuda_host_alloc_visitors_ - GUARDED_BY(mu_); - std::vector> cuda_host_free_visitors_ - GUARDED_BY(mu_); + std::vector gpu_allocators_ GUARDED_BY(mu_); + std::vector> gpu_visitors_ GUARDED_BY(mu_); + std::vector cuda_host_allocators_ GUARDED_BY(mu_); virtual ~GPUProcessState(); + // Optional RecordingAllocators that wrap the corresponding + // Allocators for runtime attribute use analysis. + std::vector gpu_al_ GUARDED_BY(mu_); + std::vector cuda_al_ GUARDED_BY(mu_); + friend class GPUDeviceTest; }; diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc index 6b2f6547b0..583bff2c07 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc @@ -31,8 +31,7 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) { 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie(), - 0 /*numa_node*/, {}, {}), + .ValueOrDie()), new NoopRounder, "pool"); EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/)); @@ -50,8 +49,7 @@ TEST(PoolAllocatorTest, ZeroSizePool) { 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie(), - 0 /*numa_node*/, {}, {}), + .ValueOrDie()), new NoopRounder, "pool"); EXPECT_EQ(0, pool.get_from_pool_count()); @@ -84,8 +82,7 @@ TEST(PoolAllocatorTest, Alignment) { 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie(), - 0 /*numa_node*/, {}, {}), + .ValueOrDie()), new NoopRounder, "pool"); for (int i = 0; i < 16; ++i) { size_t alignment = 1 << i; @@ -100,8 +97,8 @@ TEST(PoolAllocatorTest, Alignment) { TEST(PoolAllocatorTest, AutoResize) { PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/, - new BasicCPUAllocator(0 /*numa_node*/, {}, {}), - new NoopRounder, "pool"); + new BasicCPUAllocator(0 /*numa_node*/), new NoopRounder, + "pool"); // Alloc/dealloc 10 sizes just a few times, confirming pool size // stays at 2. @@ -126,32 +123,14 @@ TEST(PoolAllocatorTest, AutoResize) { } TEST(PoolAllocatorTest, CudaHostAllocator) { - int alloc_count = 0; - int64 alloc_size = 0; - SubAllocator::Visitor alloc_visitor = - [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) { - ++alloc_count; - alloc_size += size; - }; - int free_count = 0; - int64 free_size = 0; - SubAllocator::Visitor free_visitor = - [&free_count, &free_size](void* ptr, int numa_node, int64 size) { - ++free_count; - free_size += size; - }; se::Platform* platform = se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); - CUDAHostAllocator* sub_allocator = new CUDAHostAllocator( - platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie(), - 0 /*numa_node*/, {alloc_visitor}, {free_visitor}); - PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/, - sub_allocator, new NoopRounder, "pool"); - EXPECT_EQ(0, alloc_count); - EXPECT_EQ(0, alloc_size); - EXPECT_EQ(0, free_count); - EXPECT_EQ(0, free_size); + PoolAllocator pool( + 2 /*pool_size_limit*/, false /*auto_resize*/, + new CUDAHostAllocator( + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) + .ValueOrDie()), + new NoopRounder, "pool"); // Repeatedly Get a 16-byte value, confirming that there's only // one real allocation. @@ -159,10 +138,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { EXPECT_EQ(0, pool.get_from_pool_count()); EXPECT_EQ(1, pool.allocated_count()); EXPECT_NE(nullptr, p1_16); - EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes - // Each suballocation includes a 16B ChunkPrefix. - static const int kChunkPrefixSize = 16; - EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size); pool.DeallocateRaw(p1_16); // Pool contents {16} EXPECT_EQ(1, pool.put_count()); @@ -173,9 +148,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { pool.DeallocateRaw(p2_16); // Put it back. // Pool contents {16} EXPECT_EQ(2, pool.put_count()); - EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes - EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size); - EXPECT_EQ(0, free_count); // Get two more values of different sizes. void* p3_4 = pool.AllocateRaw(4, 4); @@ -188,9 +160,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { void* p4_2 = pool.AllocateRaw(4, 2); // Get a third size buffer. EXPECT_NE(nullptr, p4_2); EXPECT_EQ(0, pool.evicted_count()); - EXPECT_EQ(3, alloc_count); - EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); - EXPECT_EQ(0, free_count); // The pool is full: when we put back p4_2, the 16-byte buffer // should be evicted since it was least recently inserted. @@ -198,10 +167,6 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { // Pool contents {2, 4} EXPECT_EQ(4, pool.put_count()); EXPECT_EQ(1, pool.evicted_count()); - EXPECT_EQ(3, alloc_count); - EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); - EXPECT_EQ(1, free_count); - EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size); // Re-getting and putting size 2 or 4 should not alter pool size or // num-evicted. @@ -215,20 +180,12 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { EXPECT_EQ(6, pool.put_count()); EXPECT_EQ(3, pool.allocated_count()); EXPECT_EQ(1, pool.evicted_count()); - EXPECT_EQ(3, alloc_count); - EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); - EXPECT_EQ(1, free_count); - EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size); pool.Clear(); EXPECT_EQ(0, pool.get_from_pool_count()); EXPECT_EQ(0, pool.put_count()); EXPECT_EQ(0, pool.allocated_count()); EXPECT_EQ(0, pool.evicted_count()); - EXPECT_EQ(3, alloc_count); - EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); - EXPECT_EQ(3, free_count); - EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size); } TEST(PoolAllocatorTest, Pow2Rounder) { @@ -249,8 +206,7 @@ TEST(PoolAllocatorTest, Name) { 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie(), - 0 /*numa_node*/, {}, {}), + .ValueOrDie()), new NoopRounder, "pool"); EXPECT_EQ("pool", pool.Name()); } diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 538a70668a..df9c3a686c 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -23,11 +23,12 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/common_runtime/pool_allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" -#include "tensorflow/core/platform/numa.h" +#include "tensorflow/core/platform/mutex.h" #ifndef INTEL_MKL_DNN_ONLY #include "i_malloc.h" @@ -39,16 +40,20 @@ typedef unsigned int uint; namespace tensorflow { -class MklSubAllocator : public BasicCPUAllocator { +class MklSubAllocator : public SubAllocator { public: - MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {} ~MklSubAllocator() override {} + + void* Alloc(size_t alignment, size_t num_bytes) override { + return port::AlignedMalloc(num_bytes, alignment); + } + void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); } }; // CPU allocator that handles small-size allocations by calling // suballocator directly. Mostly, it is just a wrapper around a suballocator // (that calls malloc and free directly) with support for bookkeeping. -class MklSmallSizeAllocator : public Allocator { +class MklSmallSizeAllocator : public VisitableAllocator { public: MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory, const string& name) @@ -70,6 +75,10 @@ class MklSmallSizeAllocator : public Allocator { CHECK(map_.insert(map_val).second); // Increment statistics for small-size allocations. IncrementStats(num_bytes); + // Call alloc visitors. + for (const auto& visitor : alloc_visitors_) { + visitor(ptr, num_bytes); + } } return ptr; } @@ -85,6 +94,9 @@ class MklSmallSizeAllocator : public Allocator { if (map_iter != map_.end()) { // Call free visitors. size_t dealloc_bytes = map_iter->second; + for (const auto& visitor : free_visitors_) { + visitor(ptr, dealloc_bytes); + } sub_allocator_->Free(ptr, dealloc_bytes); DecrementStats(dealloc_bytes); map_.erase(map_iter); @@ -109,6 +121,16 @@ class MklSmallSizeAllocator : public Allocator { stats_.Clear(); } + void AddAllocVisitor(Visitor visitor) override { + mutex_lock l(mutex_); + alloc_visitors_.push_back(visitor); + } + + void AddFreeVisitor(Visitor visitor) override { + mutex_lock l(mutex_); + free_visitors_.push_back(visitor); + } + private: // Increment statistics for the allocator handling small allocations. inline void IncrementStats(size_t alloc_size) @@ -141,11 +163,15 @@ class MklSmallSizeAllocator : public Allocator { // Allocator stats for small allocs AllocatorStats stats_ GUARDED_BY(mutex_); + + // Visitors + std::vector alloc_visitors_ GUARDED_BY(mutex_); + std::vector free_visitors_ GUARDED_BY(mutex_); }; /// CPU allocator for MKL that wraps BFC allocator and intercepts /// and redirects memory allocation calls from MKL. -class MklCPUAllocator : public Allocator { +class MklCPUAllocator : public VisitableAllocator { public: // Constructor and other standard functions @@ -258,6 +284,16 @@ class MklCPUAllocator : public Allocator { large_size_allocator_->ClearStats(); } + void AddAllocVisitor(Visitor visitor) override { + small_size_allocator_->AddAllocVisitor(visitor); + large_size_allocator_->AddAllocVisitor(visitor); + } + + void AddFreeVisitor(Visitor visitor) override { + small_size_allocator_->AddFreeVisitor(visitor); + large_size_allocator_->AddFreeVisitor(visitor); + } + private: // Hooks provided by this allocator for memory allocation routines from MKL @@ -294,7 +330,7 @@ class MklCPUAllocator : public Allocator { // The alignment that we need for the allocations static constexpr const size_t kAlignment = 64; - Allocator* large_size_allocator_; // owned by this class + VisitableAllocator* large_size_allocator_; // owned by this class MklSmallSizeAllocator* small_size_allocator_; // owned by this class. SubAllocator* sub_allocator_; // not owned by this class diff --git a/tensorflow/core/common_runtime/pool_allocator.cc b/tensorflow/core/common_runtime/pool_allocator.cc index 66dc8f3322..fdad8de8d6 100644 --- a/tensorflow/core/common_runtime/pool_allocator.cc +++ b/tensorflow/core/common_runtime/pool_allocator.cc @@ -40,7 +40,8 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize, auto_resize_(auto_resize), pool_size_limit_(pool_size_limit), allocator_(allocator), - size_rounder_(size_rounder) { + size_rounder_(size_rounder), + allocation_begun_(false) { if (auto_resize) { CHECK_LT(size_t{0}, pool_size_limit) << "size limit must be > 0 if auto_resize is true."; @@ -92,6 +93,7 @@ ChunkPrefix* FindPrefix(void* user_ptr) { } // namespace void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { + if (!allocation_begun_) allocation_begun_ = true; if (num_bytes == 0) return nullptr; // If alignment is larger than kPoolAlignment, increase num_bytes so that we @@ -127,6 +129,9 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { return PrepareChunk(r, alignment, num_bytes); } else { void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes); + for (const auto& v : alloc_visitors_) { + v(ptr, num_bytes); + } return PrepareChunk(ptr, alignment, num_bytes); } } @@ -136,6 +141,9 @@ void PoolAllocator::DeallocateRaw(void* ptr) { ChunkPrefix* cp = FindPrefix(ptr); CHECK_LE((void*)cp, (void*)ptr); if (!has_size_limit_ && !auto_resize_) { + for (const auto& v : free_visitors_) { + v(cp, cp->num_bytes); + } allocator_->Free(cp, cp->num_bytes); } else { mutex_lock lock(mutex_); @@ -156,6 +164,9 @@ void PoolAllocator::Clear() { mutex_lock lock(mutex_); for (auto iter : pool_) { PtrRecord* pr = iter.second; + for (const auto& v : free_visitors_) { + v(pr->ptr, pr->num_bytes); + } allocator_->Free(pr->ptr, pr->num_bytes); delete pr; } @@ -210,6 +221,9 @@ void PoolAllocator::EvictOne() { DCHECK(iter != pool_.end()); } pool_.erase(iter); + for (const auto& v : free_visitors_) { + v(prec->ptr, prec->num_bytes); + } allocator_->Free(prec->ptr, prec->num_bytes); delete prec; ++evicted_count_; @@ -255,19 +269,28 @@ void PoolAllocator::EvictOne() { } } +void PoolAllocator::AddAllocVisitor(Visitor visitor) { + mutex_lock lock(mutex_); + CHECK(!allocation_begun_) + << "AddAllocVisitor may not be called after pool allocation " + << "has begun."; + alloc_visitors_.push_back(visitor); +} + +void PoolAllocator::AddFreeVisitor(Visitor visitor) { + mutex_lock lock(mutex_); + CHECK(!allocation_begun_) + << "AddFreeVisitor may not be called after pool allocation " + << "has begun."; + free_visitors_.push_back(visitor); +} + void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) { - void* ptr = nullptr; - if (num_bytes > 0) { - ptr = port::AlignedMalloc(num_bytes, static_cast(alignment)); - VisitAlloc(ptr, numa_node_, num_bytes); - } - return ptr; + return port::AlignedMalloc(num_bytes, static_cast(alignment)); } void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) { - if (num_bytes > 0) { - VisitFree(ptr, numa_node_, num_bytes); - port::AlignedFree(ptr); - } + port::AlignedFree(ptr); } + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h index 5b4623ba10..607734445b 100644 --- a/tensorflow/core/common_runtime/pool_allocator.h +++ b/tensorflow/core/common_runtime/pool_allocator.h @@ -16,13 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ -// Simple LRU pool allocators for various flavors of CPU RAM. +// Simple LRU pool allocators for various flavors of CPU RAM that +// implement the VisitableAllocator interface. #include #include #include #include -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -40,7 +41,7 @@ class RoundUpInterface { // Size-limited pool of memory buffers obtained from a SubAllocator // instance. Pool eviction policy is LRU. -class PoolAllocator : public Allocator { +class PoolAllocator : public VisitableAllocator { public: // "pool_size_limit" is the maximum number of returned, re-usable // memory buffers to keep in the pool. If pool_size_limit == 0, the @@ -63,6 +64,14 @@ class PoolAllocator : public Allocator { void DeallocateRaw(void* ptr) override; + // REQUIRES: The following functions may only be called prior + // to the first Allocate*() call. Once allocation has begun, it is + // illegal to register another visitor. + + void AddAllocVisitor(Visitor visitor) override; + + void AddFreeVisitor(Visitor visitor) override; + // Allocate an unused memory region of size "num_bytes". Fetch from // the pool if available, otherwise call allocator_. void* Get(size_t num_bytes); @@ -132,6 +141,12 @@ class PoolAllocator : public Allocator { int64 put_count_ GUARDED_BY(mutex_) = 0; int64 allocated_count_ GUARDED_BY(mutex_) = 0; int64 evicted_count_ GUARDED_BY(mutex_) = 0; + // Write access to these is guarded by mutex_, but not read + // access. They may only be modified prior to the first + // allocation. Later attempts to modify will fail. + std::vector alloc_visitors_; + std::vector free_visitors_; + std::atomic allocation_begun_; }; // Do-nothing rounder. Passes through sizes unchanged. @@ -151,9 +166,7 @@ class Pow2Rounder : public RoundUpInterface { class BasicCPUAllocator : public SubAllocator { public: // Argument numa_node is currently ignored. - BasicCPUAllocator(int numa_node, const std::vector& alloc_visitors, - const std::vector& free_visitors) - : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {} + explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {} ~BasicCPUAllocator() override {} @@ -163,8 +176,6 @@ class BasicCPUAllocator : public SubAllocator { private: int numa_node_; - - TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator); }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc index bcaa37fc8a..447338e7bd 100644 --- a/tensorflow/core/common_runtime/process_state.cc +++ b/tensorflow/core/common_runtime/process_state.cc @@ -71,28 +71,20 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) { return MemDesc(); } -Allocator* ProcessState::GetCPUAllocator(int numa_node) { +VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) { CHECK_GE(numa_node, 0); if (!numa_enabled_) numa_node = 0; mutex_lock lock(mu_); while (cpu_allocators_.size() <= static_cast(numa_node)) { - // If visitors have been defined we need an Allocator built from - // a SubAllocator. Prefer BFCAllocator, but fall back to PoolAllocator - // depending on env var setting. - const bool alloc_visitors_defined = - (!cpu_alloc_visitors_.empty() || !cpu_free_visitors_.empty()); bool use_bfc_allocator = false; - Status status = ReadBoolFromEnvVar( - "TF_CPU_ALLOCATOR_USE_BFC", alloc_visitors_defined, &use_bfc_allocator); + // TODO(reedwm): Switch default to BGFAllocator if it's at least as fast and + // efficient. + Status status = ReadBoolFromEnvVar("TF_CPU_ALLOCATOR_USE_BFC", false, + &use_bfc_allocator); if (!status.ok()) { LOG(ERROR) << "GetCPUAllocator: " << status.error_message(); } - Allocator* allocator = nullptr; - SubAllocator* sub_allocator = - (alloc_visitors_defined || use_bfc_allocator) - ? new BasicCPUAllocator(numa_enabled_ ? numa_node : -1, - cpu_alloc_visitors_, cpu_free_visitors_) - : nullptr; + VisitableAllocator* allocator; if (use_bfc_allocator) { // TODO(reedwm): evaluate whether 64GB by default is the best choice. int64 cpu_mem_limit_in_mb = -1; @@ -103,63 +95,34 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) { LOG(ERROR) << "GetCPUAllocator: " << status.error_message(); } int64 cpu_mem_limit = cpu_mem_limit_in_mb * (1LL << 20); - DCHECK(sub_allocator); - allocator = - new BFCAllocator(sub_allocator, cpu_mem_limit, true /*allow_growth*/, - "bfc_cpu_allocator_for_gpu" /*name*/); + allocator = new BFCAllocator( + new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), cpu_mem_limit, + true /*allow_growth*/, "bfc_cpu_allocator_for_gpu" /*name*/); VLOG(2) << "Using BFCAllocator with memory limit of " << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator"; - } else if (alloc_visitors_defined) { - DCHECK(sub_allocator); - allocator = - new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/, - sub_allocator, new NoopRounder, "cpu_pool"); + } else { + allocator = new PoolAllocator( + 100 /*pool_size_limit*/, true /*auto_resize*/, + new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), + new NoopRounder, "cpu_pool"); VLOG(2) << "Using PoolAllocator for ProcessState CPU allocator " << "numa_enabled_=" << numa_enabled_ << " numa_node=" << numa_node; - } else { - DCHECK(!sub_allocator); - allocator = cpu_allocator(); } - if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) { + if (LogMemory::IsEnabled()) { // Wrap the allocator to track allocation ids for better logging // at the cost of performance. - allocator = new TrackingAllocator(allocator, true); + allocator = new TrackingVisitableAllocator(allocator, true); } cpu_allocators_.push_back(allocator); - if (!sub_allocator) { - DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty()); - } } return cpu_allocators_[numa_node]; } -void ProcessState::AddCPUAllocVisitor(SubAllocator::Visitor visitor) { - VLOG(1) << "AddCPUAllocVisitor"; - mutex_lock lock(mu_); - CHECK_EQ(0, cpu_allocators_.size()) // Crash OK - << "AddCPUAllocVisitor must be called prior to first call to " - "ProcessState::GetCPUAllocator"; - cpu_alloc_visitors_.push_back(std::move(visitor)); -} - -void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) { - mutex_lock lock(mu_); - CHECK_EQ(0, cpu_allocators_.size()) // Crash OK - << "AddCPUFreeVisitor must be called prior to first call to " - "ProcessState::GetCPUAllocator"; - cpu_free_visitors_.push_back(std::move(visitor)); -} - void ProcessState::TestOnlyReset() { mutex_lock lock(mu_); - // Don't delete this value because it's static. - Allocator* default_cpu_allocator = cpu_allocator(); mem_desc_map_.clear(); - for (Allocator* a : cpu_allocators_) { - if (a != default_cpu_allocator) delete a; - } - cpu_allocators_.clear(); + gtl::STLDeleteElements(&cpu_allocators_); gtl::STLDeleteElements(&cpu_al_); } diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h index cac312d849..2892677333 100644 --- a/tensorflow/core/common_runtime/process_state.h +++ b/tensorflow/core/common_runtime/process_state.h @@ -30,6 +30,7 @@ limitations under the License. namespace tensorflow { class Allocator; +class VisitableAllocator; class PoolAllocator; // Singleton that manages per-process state, e.g. allocation of @@ -64,15 +65,7 @@ class ProcessState { // Returns the one CPUAllocator used for the given numa_node. // TEMPORARY: ignores numa_node. - Allocator* GetCPUAllocator(int numa_node); - - // Registers alloc visitor for the CPU allocator(s). - // REQUIRES: must be called before GetCPUAllocator. - void AddCPUAllocVisitor(SubAllocator::Visitor v); - - // Registers free visitor for the CPU allocator(s). - // REQUIRES: must be called before GetCPUAllocator. - void AddCPUFreeVisitor(SubAllocator::Visitor v); + VisitableAllocator* GetCPUAllocator(int numa_node); typedef std::unordered_map MDMap; @@ -94,9 +87,7 @@ class ProcessState { mutex mu_; - std::vector cpu_allocators_ GUARDED_BY(mu_); - std::vector cpu_alloc_visitors_ GUARDED_BY(mu_); - std::vector cpu_free_visitors_ GUARDED_BY(mu_); + std::vector cpu_allocators_ GUARDED_BY(mu_); virtual ~ProcessState(); diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index 9d59264899..103eee03b3 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -72,10 +72,9 @@ class RenamedDevice : public Device { return underlying_->MakeGpuDevice(); } - Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, - Allocator* allocator) override { - return underlying_->ReinitializeGpuDevice(context, device, dc, allocator); + void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, Allocator* allocator) override { + underlying_->ReinitializeGpuDevice(context, device, dc, allocator); } Status MakeTensorFromProto(const TensorProto& tensor_proto, diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/common_runtime/visitable_allocator.h new file mode 100644 index 0000000000..ae0563a96a --- /dev/null +++ b/tensorflow/core/common_runtime/visitable_allocator.h @@ -0,0 +1,79 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ + +#include +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/tracking_allocator.h" + +namespace tensorflow { + +// Subclass VisitableAllocator instead of Allocator when a memory +// allocator needs to enable some kind of registration/deregistration +// of memory areas. +class VisitableAllocator : public Allocator { + public: + // Visitor gets called with a pointer to a memory area and its + // size in bytes. + typedef std::function Visitor; + + // Register a visitor guaranteed to be called exactly once on each + // chunk of memory newly allocated from the underlying device. + // Typically, chunks will be reused and possibly sub-divided by a + // pool manager, so the calls will happen only once per process + // execution, not once per tensor (re)allocation. + virtual void AddAllocVisitor(Visitor visitor) = 0; + + // Register a visitor guaranteed to be called on each chunk of + // memory returned to the underlying device. + virtual void AddFreeVisitor(Visitor visitor) = 0; +}; + +// Needed for cases when a VisitableAllocator gets wrapped for tracking. +// Multiple-inheritance is considered acceptable in this case because +// VisitableAllocator is a pure virtual interface and only TrackingAllocator +// has default implementation. +class TrackingVisitableAllocator : public TrackingAllocator, + public VisitableAllocator { + public: + TrackingVisitableAllocator(VisitableAllocator* allocator, bool track_ids) + : TrackingAllocator(allocator, track_ids), allocator_(allocator) {} + ~TrackingVisitableAllocator() override {} + + string Name() override { return TrackingAllocator::Name(); } + + void* AllocateRaw(size_t alignment, size_t num_bytes) override { + return TrackingAllocator::AllocateRaw(alignment, num_bytes); + } + + void DeallocateRaw(void* ptr) override { + TrackingAllocator::DeallocateRaw(ptr); + } + + void AddAllocVisitor(Visitor visitor) override { + allocator_->AddAllocVisitor(visitor); + } + + void AddFreeVisitor(Visitor visitor) override { + allocator_->AddFreeVisitor(visitor); + } + + protected: + VisitableAllocator* allocator_; +}; +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index 84cee5569c..2a7ee16a16 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -196,7 +196,7 @@ class CPUAllocatorFactory : public AllocatorFactory { class CPUSubAllocator : public SubAllocator { public: explicit CPUSubAllocator(CPUAllocator* cpu_allocator) - : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {} + : cpu_allocator_(cpu_allocator) {} void* Alloc(size_t alignment, size_t num_bytes) override { return cpu_allocator_->AllocateRaw(alignment, num_bytes); @@ -222,22 +222,4 @@ Allocator* cpu_allocator() { } return cpu_alloc; } - -SubAllocator::SubAllocator(const std::vector& alloc_visitors, - const std::vector& free_visitors) - : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {} - -void SubAllocator::VisitAlloc(void* ptr, int index, size_t num_bytes) { - for (const auto& v : alloc_visitors_) { - v(ptr, index, num_bytes); - } -} - -void SubAllocator::VisitFree(void* ptr, int index, size_t num_bytes) { - // Although we don't guarantee any order of visitor application, strive - // to apply free visitors in reverse order of alloc visitors. - for (int i = free_visitors_.size() - 1; i >= 0; --i) { - free_visitors_[i](ptr, index, num_bytes); - } -} } // namespace tensorflow diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 8c23604625..ded120b704 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -388,36 +387,13 @@ void EnableCPUAllocatorStats(bool enable); // full statistics. By default, it's disabled. void EnableCPUAllocatorFullStats(bool enable); -// An object that does the underlying suballoc/free of memory for a higher-level -// allocator. The expectation is that the higher-level allocator is doing some -// kind of cache or pool management so that it will call SubAllocator::Alloc and -// Free relatively infrequently, compared to the number of times its own -// AllocateRaw and Free methods are called. +// Abstract interface of an object that does the underlying suballoc/free of +// memory for a higher-level allocator. class SubAllocator { public: - // Visitor gets called with a pointer to a memory area and its - // size in bytes. The index value will be numa_node for a CPU - // allocator and GPU id for a GPU allocator. - typedef std::function Visitor; - - SubAllocator(const std::vector& alloc_visitors, - const std::vector& free_visitors); - virtual ~SubAllocator() {} virtual void* Alloc(size_t alignment, size_t num_bytes) = 0; virtual void Free(void* ptr, size_t num_bytes) = 0; - - protected: - // Implementation of Alloc() method must call this on newly allocated - // value. - void VisitAlloc(void* ptr, int index, size_t num_bytes); - - // Implementation of Free() method must call this on value to be - // freed immediately before deallocation. - void VisitFree(void* ptr, int index, size_t num_bytes); - - const std::vector alloc_visitors_; - const std::vector free_visitors_; }; } // namespace tensorflow diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 53ac639b4c..794250a2c1 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -214,12 +214,10 @@ class DeviceBase { // This is overridden by GPU devices to reinitialize the derived // type returned by MakeGpuDevice. - virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/, - PerOpGpuDevice* /*device*/, - DeviceContext* /*dc*/, - Allocator* /*allocator*/) { - return Status::OK(); - } + virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/, + PerOpGpuDevice* /*device*/, + DeviceContext* /*dc*/, + Allocator* /*allocator*/) {} // Unimplemented by default virtual const DeviceAttributes& attributes() const; diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 3e34bf0418..80f2b12987 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -265,12 +265,9 @@ OpKernelContext::OpKernelContext(Params* params, int num_outputs) params_->ensure_eigen_gpu_device(); if (params_->eigen_gpu_device != nullptr) { Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes()); - Status s = params_->device->ReinitializeGpuDevice( - this, params_->eigen_gpu_device, params_->op_device_context, - eigen_gpu_allocator); - if (!s.ok()) { - SetStatus(s); - } + params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device, + params_->op_device_context, + eigen_gpu_allocator); } if (params_->record_tensor_accesses) { referenced_tensors_.Init(); -- GitLab From 5d51afdfec8c6a96d48457d4678e2835100577a6 Mon Sep 17 00:00:00 2001 From: Bairen Yi Date: Tue, 18 Sep 2018 13:17:07 +0800 Subject: [PATCH 0319/1357] Support scoped_allocator_ops for renamed device. This fixes #22274. Signed-off-by: Bairen Yi --- tensorflow/core/common_runtime/renamed_device.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index 103eee03b3..caf1300d85 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -58,6 +58,15 @@ class RenamedDevice : public Device { return underlying_->GetAllocator(attr); } + Allocator* GetScopedAllocator(AllocatorAttributes attr, + int64 step_id) override { + return underlying_->GetScopedAllocator(attr, step_id); + } + + ScopedAllocatorMgr* GetScopedAllocatorMgr() const override { + return underlying_->GetScopedAllocatorMgr(); + } + const Eigen::ThreadPoolDevice* eigen_cpu_device() override { return underlying_->eigen_cpu_device(); } -- GitLab From 9cc7bbe5b476bec556d7dce235996a03775d7492 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 17 Sep 2018 23:09:48 -0700 Subject: [PATCH 0320/1357] [XLA] Refactor conv_ops emitters to make them reusable. PiperOrigin-RevId: 213398930 --- tensorflow/compiler/tf2xla/kernels/BUILD | 22 + .../tf2xla/kernels/conv_op_helpers.cc | 509 ++++++++++++++++ .../compiler/tf2xla/kernels/conv_op_helpers.h | 69 +++ .../compiler/tf2xla/kernels/conv_ops.cc | 551 ++---------------- tensorflow/compiler/tf2xla/shape_util.cc | 14 +- tensorflow/compiler/tf2xla/shape_util.h | 5 + 6 files changed, 661 insertions(+), 509 deletions(-) create mode 100644 tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc create mode 100644 tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 46794f7b50..3e823254d3 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -113,6 +113,7 @@ tf_kernel_library( "shape_util.h", ], deps = [ + ":conv_op_helpers", ":if_op", ":while_op", "//tensorflow/compiler/tf2xla:common", @@ -172,6 +173,27 @@ tf_kernel_library( ], ) +cc_library( + name = "conv_op_helpers", + srcs = ["conv_op_helpers.cc"], + hdrs = ["conv_op_helpers.h"], + deps = [ + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/lib:constants", + "//tensorflow/compiler/xla/client/lib:numeric", + "//tensorflow/core:framework", + "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:ops_util", + "@com_google_absl//absl/types:span", + ], +) + tf_kernel_library( name = "while_op", srcs = ["while_op.cc"], diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc new file mode 100644 index 0000000000..c9a1be4940 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc @@ -0,0 +1,509 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// XLA-specific Ops for 2D convolution. + +#include "tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/lib/constants.h" +#include "tensorflow/compiler/xla/client/lib/numeric.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_slice.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/conv_grad_ops.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace tensorflow { +namespace { + +// Returns the expanded size of a filter used for depthwise convolution. +// If `shape` is [H, W, ..., M, N] returns [H, W, ..., M, M*N]. +xla::Shape ExpandedFilterShapeForDepthwiseConvolution(const xla::Shape& shape) { + int num_dims = shape.dimensions_size(); + CHECK_GE(num_dims, 2); // Crash OK + xla::Shape expanded_shape = shape; + expanded_shape.set_dimensions( + num_dims - 1, + shape.dimensions(num_dims - 2) * shape.dimensions(num_dims - 1)); + return expanded_shape; +} + +// Create a mask for depthwise convolution that will make a normal convolution +// produce the same results as a depthwise convolution. For a [2, 2, 3, 2] +// depthwise filter this returns a [2, 2, 3, 6] tensor +// 1 1 0 0 0 0 1 1 0 0 0 0 +// 0 0 1 1 0 0 0 0 1 1 0 0 +// 0 0 0 0 1 1 0 0 0 0 1 1 +// +// 1 1 0 0 0 0 1 1 0 0 0 0 +// 0 0 1 1 0 0 0 0 1 1 0 0 +// 0 0 0 0 1 1 0 0 0 0 1 1 +// +// The first step is to create a one tensor, A, that is [3] +// 0 1 2 +// +// and another tensor, B, that is [3 * 2] +// 0 1 2 3 4 5 +// +// and divide B it by 2 to get +// 0 0 1 1 2 2 +// +// then we broadcast the B to [2, 2, 3, 3 * 2] +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// 0 0 1 1 2 2 0 0 1 1 2 2 +// +// Finally compare A and broadcasted B in dimension 2 amd return the result at +// the beginning of the comment. +xla::XlaOp CreateExpandedFilterMask(const xla::Shape& filter_shape, + xla::XlaBuilder* builder) { + xla::Shape expanded_filter_shape = + ExpandedFilterShapeForDepthwiseConvolution(filter_shape); + int64 depthwise_multiplier = + filter_shape.dimensions(filter_shape.dimensions_size() - 1); + int64 input_feature = + filter_shape.dimensions(filter_shape.dimensions_size() - 2); + + // Create a M sized linspace and an M*N sized linspace that will be + // broadcasted into perpendicular dimensions and compared. + xla::XlaOp input_feature_iota = xla::Iota(builder, xla::S32, input_feature); + xla::XlaOp expanded_feature_iota = + xla::Iota(builder, xla::S32, input_feature * depthwise_multiplier); + + // Divide the M*N sized linspace by the depthwise_multiplier to create + // [0 0 1 1 2 2] in the example in the function comment. + expanded_feature_iota = + xla::Div(expanded_feature_iota, + XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32, + depthwise_multiplier)); + + // Broadcast the N*M linspace to [H, W, ..., M, M*N]. + std::vector expanded_feature_broadcast_dims( + expanded_filter_shape.dimensions().begin(), + expanded_filter_shape.dimensions().end()); + expanded_feature_broadcast_dims.pop_back(); + auto broadcasted_expanded_feature_iota = + xla::Broadcast(expanded_feature_iota, expanded_feature_broadcast_dims); + + // Compare the broadcasted linspace to the input feature linspace in the + // input feature dimension to create a diagonal predicate. + return xla::Eq(broadcasted_expanded_feature_iota, input_feature_iota, + {expanded_filter_shape.dimensions_size() - 2}); +} + +// Reshapes a filter of shape [H, W, ..., M, N] to [H, W, ..., 1, M*N]. Used to +// build a depthwise convolution. +xla::XlaOp ReshapeFilterForDepthwiseConvolution(const xla::Shape& filter_shape, + const xla::XlaOp& filter) { + int64 input_feature_dim = filter_shape.dimensions_size() - 2; + int64 output_feature_dim = filter_shape.dimensions_size() - 1; + int64 depthwise_multiplier = filter_shape.dimensions(output_feature_dim); + int64 input_feature = filter_shape.dimensions(input_feature_dim); + + // Create a [H, W, ..., 1, N*M] reshape of the filter. + xla::Shape implicit_broadcast_filter_shape = filter_shape; + implicit_broadcast_filter_shape.set_dimensions(input_feature_dim, 1); + implicit_broadcast_filter_shape.set_dimensions( + output_feature_dim, depthwise_multiplier * input_feature); + return xla::Reshape( + filter, xla::AsInt64Slice(implicit_broadcast_filter_shape.dimensions())); +} + +// Reduces the results of the convolution with an expanded filter to the +// non-expanded filter. +xla::XlaOp ContractFilterForDepthwiseBackprop(const xla::Shape& filter_shape, + const xla::XlaOp& filter_backprop, + xla::XlaBuilder* builder) { + auto masked_expanded_filter = + xla::Select(CreateExpandedFilterMask(filter_shape, builder), + filter_backprop, xla::ZerosLike(filter_backprop)); + + auto elem_type = filter_shape.element_type(); + return xla::Reshape( + // This reduce does not need inputs to be converted with + // XlaHelpers::SumAccumulationType() since the select above guarantees + // that only one element is non zero, so there cannot be accumulated + // precision error. + xla::Reduce(masked_expanded_filter, xla::Zero(builder, elem_type), + CreateScalarAddComputation(elem_type, builder), + {filter_shape.dimensions_size() - 2}), + xla::AsInt64Slice(filter_shape.dimensions())); +} + +// Performs some basic checks on ConvOpAttrs that are true for all kinds of XLA +// convolutions (as currently implemented). +Status CheckConvAttrs(const ConvOpAttrs& attrs) { + const int num_dims = attrs.num_spatial_dims + 2; + if (attrs.strides.size() != num_dims) { + return errors::InvalidArgument("Sliding window strides field must specify ", + num_dims, " dimensions"); + } + int batch_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format); + int feature_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format); + if (attrs.strides[batch_dim] != 1 || attrs.strides[feature_dim] != 1) { + return errors::Unimplemented( + "Current implementation does not yet support strides in the batch and " + "depth dimensions."); + } + if (attrs.dilations.size() != num_dims) { + return errors::InvalidArgument("Dilations field must specify ", num_dims, + " dimensions"); + } + if (attrs.dilations[batch_dim] != 1 || attrs.dilations[feature_dim] != 1) { + return errors::Unimplemented( + "Current implementation does not support dilations in the batch and " + "depth dimensions."); + } + for (int i = 0; i < attrs.num_spatial_dims; ++i) { + int input_dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i); + if (attrs.dilations[input_dim] < 1) { + return errors::Unimplemented("Dilation values must be positive; ", i, + "th spatial dimension had dilation ", + attrs.dilations[input_dim]); + } + } + return Status::OK(); +} + +// Wrapper around ConvBackpropComputeDimensions that converts from XLA shapes +// to TensorShapes. +Status ConvBackpropComputeDimensionsV2XlaShapes( + StringPiece label, int num_spatial_dims, const xla::Shape& input_shape, + const xla::Shape& filter_shape, const xla::Shape& out_backprop_shape, + absl::Span dilations, const std::vector& strides, + Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) { + TensorShape input_tensor_shape, filter_tensor_shape, + out_backprop_tensor_shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(input_shape, &input_tensor_shape)); + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(filter_shape, &filter_tensor_shape)); + TF_RETURN_IF_ERROR( + XLAShapeToTensorShape(out_backprop_shape, &out_backprop_tensor_shape)); + return ConvBackpropComputeDimensionsV2( + label, num_spatial_dims, input_tensor_shape, filter_tensor_shape, + out_backprop_tensor_shape, dilations, strides, padding, data_format, + dims); +} + +} // anonymous namespace + +xla::StatusOr ConvOpAttrs::Create(int num_spatial_dims, + bool depthwise, + OpKernelConstruction* ctx) { + ConvOpAttrs attrs; + attrs.num_spatial_dims = num_spatial_dims; + attrs.depthwise = depthwise; + TF_RETURN_IF_ERROR(ctx->GetAttr("dilations", &attrs.dilations)); + TF_RETURN_IF_ERROR(ctx->GetAttr("strides", &attrs.strides)); + TF_RETURN_IF_ERROR(ctx->GetAttr("padding", &attrs.padding)); + + string data_format; + TF_RETURN_IF_ERROR(ctx->GetAttr("data_format", &data_format)); + if (!FormatFromString(data_format, &attrs.data_format)) { + return errors::InvalidArgument("Invalid data format: ", data_format); + } + + return attrs; +} + +xla::StatusOr MakeXlaForwardConvOp(StringPiece /*type_string*/, + xla::XlaOp conv_input, + xla::XlaOp filter, + const ConvOpAttrs& attrs) { + TF_RETURN_IF_ERROR(CheckConvAttrs(attrs)); + + auto* builder = conv_input.builder(); + TF_ASSIGN_OR_RETURN(xla::Shape input_shape, builder->GetShape(conv_input)); + // Filter has the form [filter_rows, filter_cols, ..., in_depth, out_depth] + TF_ASSIGN_OR_RETURN(xla::Shape filter_shape, builder->GetShape(filter)); + + // For 2D convolution, there should be 4 dimensions. + int num_dims = attrs.num_spatial_dims + 2; + if (input_shape.dimensions_size() != num_dims) { + return errors::InvalidArgument("input must be ", num_dims, "-dimensional", + input_shape.DebugString()); + } + if (filter_shape.dimensions_size() != num_dims) { + return errors::InvalidArgument( + "filter must be ", num_dims, + "-dimensional: ", filter_shape.DebugString()); + } + + // The last two dimensions of the filter are the input and output shapes. + int batch_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format); + int feature_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format); + + int64 in_depth = filter_shape.dimensions(attrs.num_spatial_dims); + // The 'C' dimension for input is in_depth. It must be the same as + // the filter's in_depth. + if (in_depth != input_shape.dimensions(feature_dim)) { + return errors::InvalidArgument( + "input and filter must have the same depth: ", in_depth, " vs ", + input_shape.dimensions(feature_dim)); + } + + if (attrs.depthwise) { + filter = ReshapeFilterForDepthwiseConvolution(filter_shape, filter); + } + + xla::ConvolutionDimensionNumbers dims; + std::vector window_strides(attrs.num_spatial_dims); + std::vector lhs_dilation(attrs.num_spatial_dims, 1); + std::vector rhs_dilation(attrs.num_spatial_dims); + std::vector> padding(attrs.num_spatial_dims); + + dims.set_input_batch_dimension(batch_dim); + dims.set_output_batch_dimension(batch_dim); + dims.set_input_feature_dimension(feature_dim); + dims.set_output_feature_dimension(feature_dim); + dims.set_kernel_input_feature_dimension(attrs.num_spatial_dims); + dims.set_kernel_output_feature_dimension(attrs.num_spatial_dims + 1); + + for (int i = 0; i < attrs.num_spatial_dims; ++i) { + const int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i); + dims.add_input_spatial_dimensions(dim); + dims.add_kernel_spatial_dimensions(i); + dims.add_output_spatial_dimensions(dim); + window_strides[i] = attrs.strides.at(dim); + rhs_dilation[i] = attrs.dilations.at(dim); + + int64 unused_output_size; + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerboseV2( + input_shape.dimensions(dim), filter_shape.dimensions(i), + rhs_dilation[i], window_strides[i], attrs.padding, &unused_output_size, + &padding[i].first, &padding[i].second)); + } + + return xla::ConvGeneralDilated( + conv_input, filter, window_strides, padding, lhs_dilation, rhs_dilation, + dims, /*feature_group_count=*/attrs.depthwise ? in_depth : 1); +} + +xla::StatusOr MakeXlaBackpropInputConvOp( + StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter, + xla::XlaOp out_backprop, const ConvOpAttrs& attrs) { + TF_RETURN_IF_ERROR(CheckConvAttrs(attrs)); + + int num_dims = attrs.num_spatial_dims + 2; + int batch_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format); + int feature_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format); + + auto* builder = filter.builder(); + TF_ASSIGN_OR_RETURN(xla::Shape filter_shape, builder->GetShape(filter)); + TF_ASSIGN_OR_RETURN(xla::Shape out_backprop_shape, + builder->GetShape(out_backprop)); + + xla::Shape expanded_filter_shape = + attrs.depthwise ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape) + : filter_shape; + // Reuse dimension computation logic from conv_grad_ops.cc. + ConvBackpropDimensions dims; + TF_RETURN_IF_ERROR(ConvBackpropComputeDimensionsV2XlaShapes( + type_string, attrs.num_spatial_dims, input_shape, expanded_filter_shape, + out_backprop_shape, attrs.dilations, attrs.strides, attrs.padding, + attrs.data_format, &dims)); + + // The input gradients are computed by a convolution of the output + // gradients and the filter, with some appropriate padding. See the + // comment at the top of conv_grad_ops.h for details. + + xla::ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(batch_dim); + dnums.set_output_batch_dimension(batch_dim); + dnums.set_input_feature_dimension(feature_dim); + dnums.set_output_feature_dimension(feature_dim); + + // TF filter shape is [ H, W, ..., inC, outC ] + // Transpose the input and output features for computing the gradient. + dnums.set_kernel_input_feature_dimension(attrs.num_spatial_dims + 1); + dnums.set_kernel_output_feature_dimension(attrs.num_spatial_dims); + + std::vector kernel_spatial_dims(attrs.num_spatial_dims); + std::vector> padding(attrs.num_spatial_dims); + std::vector lhs_dilation(attrs.num_spatial_dims); + std::vector rhs_dilation(attrs.num_spatial_dims); + std::vector ones(attrs.num_spatial_dims, 1); + for (int i = 0; i < attrs.num_spatial_dims; ++i) { + int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i); + dnums.add_input_spatial_dimensions(dim); + dnums.add_kernel_spatial_dimensions(i); + dnums.add_output_spatial_dimensions(dim); + + kernel_spatial_dims[i] = i; + padding[i] = {dims.spatial_dims[i].pad_before, + dims.spatial_dims[i].pad_after}; + lhs_dilation[i] = dims.spatial_dims[i].stride; + rhs_dilation[i] = attrs.dilations[dim]; + } + + // Mirror the filter in the spatial dimensions. + xla::XlaOp mirrored_weights = xla::Rev(filter, kernel_spatial_dims); + + // activation gradients + // = gradients (with padding and dilation) mirrored_weights + return xla::ConvGeneralDilated( + out_backprop, mirrored_weights, /*window_strides=*/ones, padding, + lhs_dilation, rhs_dilation, dnums, + /*feature_group_count=*/ + attrs.depthwise ? out_backprop_shape.dimensions(feature_dim) / + filter_shape.dimensions(attrs.num_spatial_dims + 1) + : 1); +} + +xla::StatusOr MakeXlaBackpropFilterConvOp( + StringPiece type_string, xla::XlaOp activations, + const xla::Shape& filter_shape, xla::XlaOp gradients, + const ConvOpAttrs& attrs) { + TF_RETURN_IF_ERROR(CheckConvAttrs(attrs)); + + auto* builder = activations.builder(); + TF_ASSIGN_OR_RETURN(xla::Shape activations_shape, + builder->GetShape(activations)); + TF_ASSIGN_OR_RETURN(xla::Shape out_backprop_shape, + builder->GetShape(gradients)); + const xla::Shape expanded_filter_shape = + attrs.depthwise ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape) + : filter_shape; + + // Reuse dimension computation logic from conv_grad_ops.cc. + ConvBackpropDimensions dims; + TF_RETURN_IF_ERROR(ConvBackpropComputeDimensionsV2XlaShapes( + type_string, attrs.num_spatial_dims, activations_shape, + expanded_filter_shape, out_backprop_shape, attrs.dilations, attrs.strides, + attrs.padding, attrs.data_format, &dims)); + + // The filter gradients are computed by a convolution of the input + // activations and the output gradients, with some appropriate padding. + // See the comment at the top of conv_grad_ops.h for details. + + xla::ConvolutionDimensionNumbers dnums; + + // The activations (inputs) form the LHS of the convolution. + // Activations have shape: [batch, in_rows, in_cols, ..., in_depth] + // For the gradient computation, we flip the roles of the batch and + // feature dimensions. + // Each spatial entry has size in_depth * batch + + // The last two dimensions of the filter are the input and output shapes. + int num_dims = attrs.num_spatial_dims + 2; + int n_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format); + int c_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format); + + // Swap n_dim and c_dim in the activations. + dnums.set_input_batch_dimension(c_dim); + dnums.set_input_feature_dimension(n_dim); + + // The gradients become the RHS of the convolution. + // The gradients have shape [batch, out_rows, out_cols, ..., out_depth] + // where the batch becomes the input feature for the convolution. + dnums.set_kernel_input_feature_dimension(n_dim); + dnums.set_kernel_output_feature_dimension(c_dim); + + std::vector> padding(attrs.num_spatial_dims); + std::vector rhs_dilation(attrs.num_spatial_dims); + std::vector window_strides(attrs.num_spatial_dims); + std::vector ones(attrs.num_spatial_dims, 1); + + // Tensorflow filter shape is [ H, W, ..., inC, outC ]. + for (int i = 0; i < attrs.num_spatial_dims; ++i) { + dnums.add_output_spatial_dimensions(i); + } + dnums.set_output_batch_dimension(attrs.num_spatial_dims); + dnums.set_output_feature_dimension(attrs.num_spatial_dims + 1); + + for (int i = 0; i < attrs.num_spatial_dims; ++i) { + int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i); + dnums.add_input_spatial_dimensions(dim); + dnums.add_kernel_spatial_dimensions(dim); + + // We will also need to pad the input with zeros such that after the + // convolution, we get the right size for the filter. + // The padded_in_rows should be such that when we convolve this with the + // expanded_out_rows as a filter, we should get filter_rows back. + // + const int64 padded_in_size = + dims.spatial_dims[i].expanded_output_size + + (dims.spatial_dims[i].filter_size - 1) * attrs.dilations[dim]; + + // However it can be smaller than input_rows: in this + // case it means some of the inputs are not used. + // + // An example is to have input_cols = 3, filter_cols = 2 and stride = 2: + // + // INPUT = [ A B C ] + // + // FILTER = [ x y ] + // + // and the output will only have one column: a = A * x + B * y + // + // and input "C" is not used at all. + // + // We apply negative padding in this case. + const int64 pad_total = padded_in_size - dims.spatial_dims[i].input_size; + + // + For the VALID padding, we don't pad anything on the top/left side + // and pad the bottom/right side with the remaining space. + // + For the SAME padding, we pad top/left side the same as bottom/right + // side. + // + // In addition, if the padded input size is smaller than the input size, + // we need to ignore some training elements of the input. We do this by + // applying negative padding on the right/bottom. + const int64 pad_before = + attrs.padding == Padding::SAME ? std::max(pad_total / 2, 0) : 0; + + padding[i] = {pad_before, pad_total - pad_before}; + rhs_dilation[i] = dims.spatial_dims[i].stride; + window_strides[i] = attrs.dilations[dim]; + } + + // Besides padding the input, we will also expand output_rows to + // expanded_out_rows = (output_rows - 1) * stride + 1 + // with zeros in between: + // + // a . . . b . . . c . . . d . . . e + // + // This is done by specifying the window dilation factors in the + // convolution HLO below. + auto filter_backprop = + xla::ConvGeneralDilated(activations, gradients, window_strides, padding, + /*lhs_dilation=*/ones, rhs_dilation, dnums); + + if (attrs.depthwise) { + filter_backprop = ContractFilterForDepthwiseBackprop( + filter_shape, filter_backprop, activations.builder()); + } + + return filter_backprop; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h new file mode 100644 index 0000000000..6e1b70a478 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h @@ -0,0 +1,69 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_KERNELS_CONV_OP_HELPERS_H_ +#define TENSORFLOW_COMPILER_TF2XLA_KERNELS_CONV_OP_HELPERS_H_ + +#include + +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/util/tensor_format.h" + +// This header exposes utilities for translating TensorFlow convolution ops into +// XLA ops. +// +// conv_ops.cc contains lowerings for many of these TF convolution ops (e.g. +// Conv2D, Conv3DBackpropFilterV2), but you might want to use the utilities in +// this header to implement a new and exciting convolution op, for example a +// fused TensorFlow op that contains a convolution and other things. + +namespace tensorflow { + +// ConvOpAttrs contains all of the metadata necessary to specify a TF or XLA +// convolution. +struct ConvOpAttrs { + // Constructs a ConvOpAttrs, reading most of the attributes from `ctx`. + static xla::StatusOr Create(int num_spatial_dims, bool depthwise, + OpKernelConstruction* ctx); + + bool depthwise; + int num_spatial_dims; + std::vector dilations; + std::vector strides; + Padding padding; + TensorFormat data_format; +}; + +// Creates a new XLA forward or backward convolution with the given inputs and +// attributes. +xla::StatusOr MakeXlaForwardConvOp(StringPiece type_string, + xla::XlaOp conv_input, + xla::XlaOp filter, + const ConvOpAttrs& attrs); +xla::StatusOr MakeXlaBackpropInputConvOp( + StringPiece type_string, const xla::Shape& input_shape, xla::XlaOp filter, + xla::XlaOp out_backprop, const ConvOpAttrs& attrs); +xla::StatusOr MakeXlaBackpropFilterConvOp( + StringPiece type_string, xla::XlaOp activations, + const xla::Shape& filter_shape, xla::XlaOp gradients, + const ConvOpAttrs& attrs); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_KERNELS_CONV_OP_HELPERS_H_ diff --git a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc index 674720e22f..cd7c820be0 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_ops.cc @@ -15,12 +15,17 @@ limitations under the License. // XLA-specific Ops for 2D convolution. +#include "tensorflow/compiler/tf2xla/kernels/conv_op_helpers.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/constants.h" #include "tensorflow/compiler/xla/client/lib/numeric.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" @@ -33,250 +38,28 @@ limitations under the License. #include "tensorflow/core/util/tensor_format.h" namespace tensorflow { - namespace { -// Returns the expanded size of a filter used for depthwise convolution. -// If `shape` is [H, W, ..., M, N] returns [H, W, ..., M, M*N]. -TensorShape ExpandedFilterShapeForDepthwiseConvolution( - const TensorShape& shape) { - int num_dims = shape.dims(); - CHECK_GE(num_dims, 2); - TensorShape expanded_shape = shape; - expanded_shape.set_dim(num_dims - 1, shape.dim_size(num_dims - 2) * - shape.dim_size(num_dims - 1)); - return expanded_shape; -} - -// Broadcast zeros to ExpandedFilterShapeForDepthwiseConvolution. -xla::XlaOp CreateExpandedZero(const TensorShape& filter_shape, DataType dtype, - xla::XlaBuilder* builder) { - TensorShape expanded_filter_shape = - ExpandedFilterShapeForDepthwiseConvolution(filter_shape); - return xla::Broadcast(XlaHelpers::Zero(builder, dtype), - expanded_filter_shape.dim_sizes()); -} - -// Create a mask for depthwise convolution that will make a normal convolution -// produce the same results as a depthwise convolution. For a [2, 2, 3, 2] -// depthwise filter this returns a [2, 2, 3, 6] tensor -// 1 1 0 0 0 0 1 1 0 0 0 0 -// 0 0 1 1 0 0 0 0 1 1 0 0 -// 0 0 0 0 1 1 0 0 0 0 1 1 -// -// 1 1 0 0 0 0 1 1 0 0 0 0 -// 0 0 1 1 0 0 0 0 1 1 0 0 -// 0 0 0 0 1 1 0 0 0 0 1 1 -// -// The first step is to create a one tensor, A, that is [3] -// 0 1 2 -// -// and another tensor, B, that is [3 * 2] -// 0 1 2 3 4 5 -// -// and divide B it by 2 to get -// 0 0 1 1 2 2 -// -// then we broadcast the B to [2, 2, 3, 3 * 2] -// 0 0 1 1 2 2 0 0 1 1 2 2 -// 0 0 1 1 2 2 0 0 1 1 2 2 -// 0 0 1 1 2 2 0 0 1 1 2 2 -// -// 0 0 1 1 2 2 0 0 1 1 2 2 -// 0 0 1 1 2 2 0 0 1 1 2 2 -// 0 0 1 1 2 2 0 0 1 1 2 2 -// -// Finally compare A and broadcasted B in dimension 2 amd return the result at -// the beginning of the comment. -xla::XlaOp CreateExpandedFilterMask(const TensorShape& filter_shape, - xla::XlaBuilder* builder) { - TensorShape expanded_filter_shape = - ExpandedFilterShapeForDepthwiseConvolution(filter_shape); - int64 depthwise_multiplier = filter_shape.dim_size(filter_shape.dims() - 1); - int64 input_feature = filter_shape.dim_size(filter_shape.dims() - 2); - - // Create a M sized linspace and an M*N sized linspace that will be - // broadcasted into perpendicular dimensions and compared. - xla::XlaOp input_feature_iota = xla::Iota(builder, xla::S32, input_feature); - xla::XlaOp expanded_feature_iota = - xla::Iota(builder, xla::S32, input_feature * depthwise_multiplier); - - // Divide the M*N sized linspace by the depthwise_multiplier to create - // [0 0 1 1 2 2] in the example in the function comment. - expanded_feature_iota = - xla::Div(expanded_feature_iota, - XlaHelpers::IntegerLiteral(builder, DataType::DT_INT32, - depthwise_multiplier)); - - // Broadcast the N*M linspace to [H, W, ..., M, M*N]. - auto expanded_feature_broadcast_dims = expanded_filter_shape.dim_sizes(); - expanded_feature_broadcast_dims.pop_back(); - auto broadcasted_expanded_feature_iota = - xla::Broadcast(expanded_feature_iota, expanded_feature_broadcast_dims); - - // Compare the broadcasted linspace to the input feature linspace in the - // input feature dimension to create a diagonal predicate. - return xla::Eq(broadcasted_expanded_feature_iota, input_feature_iota, - {expanded_filter_shape.dims() - 2}); -} - -// Reshapes a filter of shape [H, W, ..., M, N] to [H, W, ..., 1, M*N]. Used to -// build a depthwise convolution. -xla::XlaOp ReshapeFilterForDepthwiseConvolution(const TensorShape& filter_shape, - const xla::XlaOp& filter) { - int64 input_feature_dim = filter_shape.dims() - 2; - int64 output_feature_dim = filter_shape.dims() - 1; - int64 depthwise_multiplier = filter_shape.dim_size(output_feature_dim); - int64 input_feature = filter_shape.dim_size(input_feature_dim); - - // Create a [H, W, ..., 1, N*M] reshape of the filter. - TensorShape implicit_broadcast_filter_shape = filter_shape; - implicit_broadcast_filter_shape.set_dim(input_feature_dim, 1); - implicit_broadcast_filter_shape.set_dim(output_feature_dim, - depthwise_multiplier * input_feature); - return xla::Reshape(filter, implicit_broadcast_filter_shape.dim_sizes()); -} - -// Reduces the results of the convolution with an expanded filter to the -// non-expanded filter. -xla::XlaOp ContractFilterForDepthwiseBackprop(XlaOpKernelContext* ctx, - const TensorShape& filter_shape, - DataType dtype, - const xla::XlaOp& filter_backprop, - xla::XlaBuilder* builder) { - auto masked_expanded_filter = xla::Select( - CreateExpandedFilterMask(filter_shape, builder), filter_backprop, - CreateExpandedZero(filter_shape, dtype, builder)); - return xla::Reshape( - // This reduce does not need inputs to be converted with - // XlaHelpers::SumAccumulationType() since the ExpandedFilterMask with - // ExpandedZero guarantees that only one element is non zero, so there - // cannot be accumulated precision error. - xla::Reduce(masked_expanded_filter, XlaHelpers::Zero(builder, dtype), - *ctx->GetOrCreateAdd(dtype), {filter_shape.dims() - 2}), - filter_shape.dim_sizes()); -} - class ConvOp : public XlaOpKernel { public: explicit ConvOp(OpKernelConstruction* ctx, int num_spatial_dims, bool depthwise) - : XlaOpKernel(ctx), - num_spatial_dims_(num_spatial_dims), - depthwise_(depthwise) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); - - string data_format; - OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format)); - OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_), - errors::InvalidArgument("Invalid data format")); + : XlaOpKernel(ctx) { + xla::StatusOr attrs = + ConvOpAttrs::Create(num_spatial_dims, depthwise, ctx); + OP_REQUIRES_OK(ctx, attrs.status()); + attrs_ = attrs.ValueOrDie(); } - int num_dims() const { return num_spatial_dims_ + 2; } - void Compile(XlaOpKernelContext* ctx) override { - OP_REQUIRES(ctx, strides_.size() == num_dims(), - errors::InvalidArgument("Sliding window strides field must " - "specify ", - num_dims(), " dimensions")); - int batch_dim = GetTensorBatchDimIndex(num_dims(), data_format_); - int feature_dim = GetTensorFeatureDimIndex(num_dims(), data_format_); - OP_REQUIRES( - ctx, strides_[batch_dim] == 1 && strides_[feature_dim] == 1, - errors::Unimplemented("Current implementation does not yet support " - "strides in the batch and depth dimensions.")); - - OP_REQUIRES(ctx, dilations_.size() == num_dims(), - errors::InvalidArgument("Dilations field must " - "specify ", - num_dims(), " dimensions")); - OP_REQUIRES( - ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1, - errors::Unimplemented("Current implementation does not support " - "dilations in the batch and depth dimensions.")); - for (int i = 0; i < num_spatial_dims_; ++i) { - int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - OP_REQUIRES(ctx, dilations_[input_dim] >= 1, - errors::Unimplemented("Dilation values must be positive; ", i, - "th spatial dimension had dilation ", - dilations_[input_dim])); - } - - const TensorShape input_shape = ctx->InputShape(0); - // Input filter is of the following dimensions: - // [ filter_rows, filter_cols, ..., in_depth, out_depth] - const TensorShape filter_shape = ctx->InputShape(1); - - // For 2D convolution, there should be 4 dimensions. - OP_REQUIRES( - ctx, input_shape.dims() == num_dims(), - errors::InvalidArgument("input must be ", num_dims(), "-dimensional", - input_shape.DebugString())); - OP_REQUIRES( - ctx, filter_shape.dims() == num_dims(), - errors::InvalidArgument("filter must be ", num_dims(), - "-dimensional: ", filter_shape.DebugString())); - - // The last two dimension of the filter are the input and output shapes. - const int64 in_depth = filter_shape.dim_size(num_spatial_dims_); - - // The 'C' dimension for input is in_depth. It must be the same as - // the filter's in_depth. - OP_REQUIRES(ctx, in_depth == input_shape.dim_size(feature_dim), - errors::InvalidArgument( - "input and filter must have the same depth: ", in_depth, - " vs ", input_shape.dim_size(feature_dim))); - - xla::XlaOp filter = ctx->Input(1); - if (depthwise_) { - filter = ReshapeFilterForDepthwiseConvolution(filter_shape, filter); - } - - xla::ConvolutionDimensionNumbers dims; - std::vector window_strides(num_spatial_dims_); - std::vector lhs_dilation(num_spatial_dims_, 1); - std::vector rhs_dilation(num_spatial_dims_); - std::vector> padding(num_spatial_dims_); - - dims.set_input_batch_dimension(batch_dim); - dims.set_output_batch_dimension(batch_dim); - dims.set_input_feature_dimension(feature_dim); - dims.set_output_feature_dimension(feature_dim); - dims.set_kernel_input_feature_dimension(num_spatial_dims_); - dims.set_kernel_output_feature_dimension(num_spatial_dims_ + 1); - - for (int i = 0; i < num_spatial_dims_; ++i) { - const int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - dims.add_input_spatial_dimensions(dim); - dims.add_kernel_spatial_dimensions(i); - dims.add_output_spatial_dimensions(dim); - window_strides[i] = strides_.at(dim); - rhs_dilation[i] = dilations_.at(dim); - - int64 unused_output_size; - OP_REQUIRES_OK( - ctx, GetWindowedOutputSizeVerboseV2( - input_shape.dim_size(dim), filter_shape.dim_size(i), - rhs_dilation[i], window_strides[i], padding_, - &unused_output_size, &padding[i].first, &padding[i].second)); - } - - xla::XlaOp conv = xla::ConvGeneralDilated( - ctx->Input(0), filter, window_strides, padding, lhs_dilation, - rhs_dilation, dims, - /*feature_group_count=*/depthwise_ ? in_depth : 1); - ctx->SetOutput(0, conv); + xla::StatusOr conv = MakeXlaForwardConvOp( + ctx->op_kernel().type_string(), ctx->Input(0), ctx->Input(1), attrs_); + OP_REQUIRES_OK(ctx, conv.status()); + ctx->SetOutput(0, conv.ValueOrDie()); } protected: - const int num_spatial_dims_; - const bool depthwise_; - std::vector dilations_; - std::vector strides_; - Padding padding_; - TensorFormat data_format_ = FORMAT_NHWC; + ConvOpAttrs attrs_; private: TF_DISALLOW_COPY_AND_ASSIGN(ConvOp); @@ -308,124 +91,28 @@ class ConvBackpropInputOp : public XlaOpKernel { public: explicit ConvBackpropInputOp(OpKernelConstruction* ctx, int num_spatial_dims, bool depthwise) - : XlaOpKernel(ctx), - num_spatial_dims_(num_spatial_dims), - depthwise_(depthwise) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); - string data_format; - OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format)); - OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_), - errors::InvalidArgument("Invalid data format")); + : XlaOpKernel(ctx) { + xla::StatusOr attrs = + ConvOpAttrs::Create(num_spatial_dims, depthwise, ctx); + OP_REQUIRES_OK(ctx, attrs.status()); + attrs_ = attrs.ValueOrDie(); } - int num_dims() const { return num_spatial_dims_ + 2; } - void Compile(XlaOpKernelContext* ctx) override { - OP_REQUIRES(ctx, strides_.size() == num_dims(), - errors::InvalidArgument("Sliding window strides field must " - "specify ", - num_dims(), " dimensions")); - int batch_dim = GetTensorBatchDimIndex(num_dims(), data_format_); - int feature_dim = GetTensorFeatureDimIndex(num_dims(), data_format_); - OP_REQUIRES( - ctx, strides_[batch_dim] == 1 && strides_[feature_dim] == 1, - errors::Unimplemented("Current implementation does not yet support " - "strides in the batch and depth dimensions.")); - - OP_REQUIRES(ctx, dilations_.size() == num_dims(), - errors::InvalidArgument("Dilations field must " - "specify ", - num_dims(), " dimensions")); - OP_REQUIRES( - ctx, dilations_[batch_dim] == 1 && dilations_[feature_dim] == 1, - errors::Unimplemented("Current implementation does not support " - "dilations in the batch and depth dimensions.")); - for (int i = 0; i < num_spatial_dims_; ++i) { - int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - OP_REQUIRES(ctx, dilations_[input_dim] >= 1, - errors::Unimplemented("Dilation values must be positive; ", i, - "th spatial dimension had dilation ", - dilations_[input_dim])); - } - - TensorShape input_shape; - OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_shape)); - - const TensorShape filter_shape = ctx->InputShape(1); - const TensorShape out_backprop_shape = ctx->InputShape(2); - - const TensorShape expanded_filter_shape = - depthwise_ ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape) - : filter_shape; - // Reuse dimension computation logic from conv_grad_ops.cc. - ConvBackpropDimensions dims; - OP_REQUIRES_OK(ctx, - ConvBackpropComputeDimensionsV2( - type_string(), num_spatial_dims_, input_shape, - expanded_filter_shape, out_backprop_shape, dilations_, - strides_, padding_, data_format_, &dims)); - - auto filter = ctx->Input(1); - auto out_backprop = ctx->Input(2); - - // The input gradients are computed by a convolution of the output - // gradients and the filter, with some appropriate padding. See the - // comment at the top of conv_grad_ops.h for details. - - xla::ConvolutionDimensionNumbers dnums; - dnums.set_input_batch_dimension(batch_dim); - dnums.set_output_batch_dimension(batch_dim); - dnums.set_input_feature_dimension(feature_dim); - dnums.set_output_feature_dimension(feature_dim); - - // TF filter shape is [ H, W, ..., inC, outC ] - // Transpose the input and output features for computing the gradient. - dnums.set_kernel_input_feature_dimension(num_spatial_dims_ + 1); - dnums.set_kernel_output_feature_dimension(num_spatial_dims_); - - std::vector kernel_spatial_dims(num_spatial_dims_); - std::vector> padding(num_spatial_dims_); - std::vector lhs_dilation(num_spatial_dims_); - std::vector rhs_dilation(num_spatial_dims_); - std::vector ones(num_spatial_dims_, 1); - for (int i = 0; i < num_spatial_dims_; ++i) { - int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - dnums.add_input_spatial_dimensions(dim); - dnums.add_kernel_spatial_dimensions(i); - dnums.add_output_spatial_dimensions(dim); - - kernel_spatial_dims[i] = i; - padding[i] = {dims.spatial_dims[i].pad_before, - dims.spatial_dims[i].pad_after}; - lhs_dilation[i] = dims.spatial_dims[i].stride; - rhs_dilation[i] = dilations_[dim]; - } - - // Mirror the filter in the spatial dimensions. - xla::XlaOp mirrored_weights = xla::Rev(filter, kernel_spatial_dims); - - // activation gradients - // = gradients (with padding and dilation) mirrored_weights - xla::XlaOp in_backprop = xla::ConvGeneralDilated( - out_backprop, mirrored_weights, /*window_strides=*/ones, padding, - lhs_dilation, rhs_dilation, dnums, - /*feature_group_count=*/ - depthwise_ ? out_backprop_shape.dim_size(feature_dim) / - filter_shape.dim_size(num_spatial_dims_ + 1) - : 1); - - ctx->SetOutput(0, in_backprop); + TensorShape input_tensor_shape; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &input_tensor_shape)); + xla::Shape input_shape = + TensorShapeToXLAShape(ctx->input_xla_type(1), input_tensor_shape); + + xla::StatusOr in_backprop = + MakeXlaBackpropInputConvOp(ctx->op_kernel().type_string(), input_shape, + ctx->Input(1), ctx->Input(2), attrs_); + OP_REQUIRES_OK(ctx, in_backprop.status()); + ctx->SetOutput(0, in_backprop.ValueOrDie()); } protected: - const int num_spatial_dims_; - const bool depthwise_; - std::vector dilations_; - std::vector strides_; - Padding padding_; - TensorFormat data_format_ = FORMAT_NHWC; + ConvOpAttrs attrs_; private: TF_DISALLOW_COPY_AND_ASSIGN(ConvBackpropInputOp); @@ -462,172 +149,28 @@ class ConvBackpropFilterOp : public XlaOpKernel { public: explicit ConvBackpropFilterOp(OpKernelConstruction* ctx, int num_spatial_dims, bool depthwise) - : XlaOpKernel(ctx), - num_spatial_dims_(num_spatial_dims), - depthwise_(depthwise) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("dilations", &dilations_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &strides_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); - string data_format; - OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format)); - OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_), - errors::InvalidArgument("Invalid data format")); + : XlaOpKernel(ctx) { + xla::StatusOr attrs = + ConvOpAttrs::Create(num_spatial_dims, depthwise, ctx); + OP_REQUIRES_OK(ctx, attrs.status()); + attrs_ = attrs.ValueOrDie(); } - int num_dims() const { return num_spatial_dims_ + 2; } - void Compile(XlaOpKernelContext* ctx) override { - const int n_dim = GetTensorBatchDimIndex(num_dims(), data_format_); - const int c_dim = GetTensorFeatureDimIndex(num_dims(), data_format_); - - OP_REQUIRES( - ctx, (strides_[n_dim] == 1 && strides_[c_dim] == 1), - errors::InvalidArgument("Current implementation does not yet support " - "strides in the batch and depth dimensions.")); - - OP_REQUIRES(ctx, dilations_.size() == num_dims(), - errors::InvalidArgument("Dilations field must " - "specify ", - num_dims(), " dimensions")); - OP_REQUIRES( - ctx, dilations_[n_dim] == 1 && dilations_[c_dim] == 1, - errors::Unimplemented("Current implementation does not support " - "dilations in the batch and depth dimensions.")); - for (int i = 0; i < num_spatial_dims_; ++i) { - int input_dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - OP_REQUIRES(ctx, dilations_[input_dim] >= 1, - errors::Unimplemented("Dilation values must be positive; ", i, - "th spatial dimension had dilation ", - dilations_[input_dim])); - } - - const TensorShape activations_shape = ctx->InputShape(0); - TensorShape filter_shape; - OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_shape)); - const TensorShape out_backprop_shape = ctx->InputShape(2); - - const TensorShape expanded_filter_shape = - depthwise_ ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape) - : filter_shape; - - // Reuse dimension computation logic from conv_grad_ops.cc. - ConvBackpropDimensions dims; - OP_REQUIRES_OK(ctx, - ConvBackpropComputeDimensionsV2( - type_string(), num_spatial_dims_, activations_shape, - expanded_filter_shape, out_backprop_shape, dilations_, - strides_, padding_, data_format_, &dims)); - - xla::XlaBuilder* b = ctx->builder(); - xla::XlaOp activations = ctx->Input(0); - xla::XlaOp gradients = ctx->Input(2); - - // The filter gradients are computed by a convolution of the input - // activations and the output gradients, with some appropriate padding. - // See the comment at the top of conv_grad_ops.h for details. - - xla::ConvolutionDimensionNumbers dnums; - - // The activations (inputs) form the LHS of the convolution. - // Activations have shape: [batch, in_rows, in_cols, ..., in_depth] - // For the gradient computation, we flip the roles of the batch and - // feature dimensions. - // Each spatial entry has size in_depth * batch - - // Swap n_dim and c_dim in the activations. - dnums.set_input_batch_dimension(c_dim); - dnums.set_input_feature_dimension(n_dim); - - // The gradients become the RHS of the convolution. - // The gradients have shape [batch, out_rows, out_cols, ..., out_depth] - // where the batch becomes the input feature for the convolution. - dnums.set_kernel_input_feature_dimension(n_dim); - dnums.set_kernel_output_feature_dimension(c_dim); - - std::vector> padding(num_spatial_dims_); - std::vector rhs_dilation(num_spatial_dims_); - std::vector window_strides(num_spatial_dims_); - std::vector ones(num_spatial_dims_, 1); - - // Tensorflow filter shape is [ H, W, ..., inC, outC ]. - for (int i = 0; i < num_spatial_dims_; ++i) { - dnums.add_output_spatial_dimensions(i); - } - dnums.set_output_batch_dimension(num_spatial_dims_); - dnums.set_output_feature_dimension(num_spatial_dims_ + 1); - - for (int i = 0; i < num_spatial_dims_; ++i) { - int64 dim = GetTensorSpatialDimIndex(num_dims(), data_format_, i); - dnums.add_input_spatial_dimensions(dim); - dnums.add_kernel_spatial_dimensions(dim); - - // We will also need to pad the input with zeros such that after the - // convolution, we get the right size for the filter. - // The padded_in_rows should be such that when we convolve this with the - // expanded_out_rows as a filter, we should get filter_rows back. - // - const int64 padded_in_size = - dims.spatial_dims[i].expanded_output_size + - (dims.spatial_dims[i].filter_size - 1) * dilations_[dim]; - - // However it can be smaller than input_rows: in this - // case it means some of the inputs are not used. - // - // An example is to have input_cols = 3, filter_cols = 2 and stride = 2: - // - // INPUT = [ A B C ] - // - // FILTER = [ x y ] - // - // and the output will only have one column: a = A * x + B * y - // - // and input "C" is not used at all. - // - // We apply negative padding in this case. - const int64 pad_total = padded_in_size - dims.spatial_dims[i].input_size; - - // + For the VALID padding, we don't pad anything on the top/left side - // and pad the bottom/right side with the remaining space. - // + For the SAME padding, we pad top/left side the same as bottom/right - // side. - // - // In addition, if the padded input size is smaller than the input size, - // we need to ignore some training elements of the input. We do this by - // applying negative padding on the right/bottom. - const int64 pad_before = - padding_ == Padding::SAME ? std::max(pad_total / 2, 0) : 0; - - padding[i] = {pad_before, pad_total - pad_before}; - rhs_dilation[i] = dims.spatial_dims[i].stride; - window_strides[i] = dilations_[dim]; - } - - // Besides padding the input, we will also expand output_rows to - // expanded_out_rows = (output_rows - 1) * stride + 1 - // with zeros in between: - // - // a . . . b . . . c . . . d . . . e - // - // This is done by specifying the window dilation factors in the - // convolution HLO below. - auto filter_backprop = - xla::ConvGeneralDilated(activations, gradients, window_strides, padding, - /*lhs_dilation=*/ones, rhs_dilation, dnums); - - if (depthwise_) { - filter_backprop = ContractFilterForDepthwiseBackprop( - ctx, filter_shape, ctx->input_type(0), filter_backprop, b); - } - ctx->SetOutput(0, filter_backprop); + TensorShape filter_tensor_shape; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(1, &filter_tensor_shape)); + xla::Shape filter_shape = + TensorShapeToXLAShape(ctx->input_xla_type(0), filter_tensor_shape); + + xla::StatusOr filter_backprop = MakeXlaBackpropFilterConvOp( + ctx->op_kernel().type_string(), ctx->Input(0), filter_shape, + ctx->Input(2), attrs_); + OP_REQUIRES_OK(ctx, filter_backprop.status()); + ctx->SetOutput(0, filter_backprop.ValueOrDie()); } protected: - const int num_spatial_dims_; - const bool depthwise_; - std::vector dilations_; - std::vector strides_; - Padding padding_; - TensorFormat data_format_ = FORMAT_NHWC; + ConvOpAttrs attrs_; private: TF_DISALLOW_COPY_AND_ASSIGN(ConvBackpropFilterOp); diff --git a/tensorflow/compiler/tf2xla/shape_util.cc b/tensorflow/compiler/tf2xla/shape_util.cc index 9d1992205b..b589512dcd 100644 --- a/tensorflow/compiler/tf2xla/shape_util.cc +++ b/tensorflow/compiler/tf2xla/shape_util.cc @@ -41,6 +41,14 @@ Status XLAShapeToTensorShape(const xla::Shape& shape, // Convert a TensorShape into the equivalent XLA Shape proto. Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape, xla::Shape* shape) { + xla::PrimitiveType type; + TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(dtype, &type)); + *shape = TensorShapeToXLAShape(type, tensor_shape); + return Status::OK(); +} + +xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type, + const TensorShape& tensor_shape) { int rank = tensor_shape.dims(); std::vector dimensions(rank); std::vector layout(rank); @@ -50,11 +58,7 @@ Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape, // XLA uses minor-to-major; Tensorflow uses major-to-minor. std::iota(layout.rbegin(), layout.rend(), 0); - xla::PrimitiveType type; - TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(dtype, &type)); - - *shape = xla::ShapeUtil::MakeShapeWithLayout(type, dimensions, layout); - return Status::OK(); + return xla::ShapeUtil::MakeShapeWithLayout(type, dimensions, layout); } } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/shape_util.h b/tensorflow/compiler/tf2xla/shape_util.h index 58240b9c96..f7e34a5b40 100644 --- a/tensorflow/compiler/tf2xla/shape_util.h +++ b/tensorflow/compiler/tf2xla/shape_util.h @@ -35,6 +35,11 @@ Status XLAShapeToTensorShape(const xla::Shape& shape, Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape, xla::Shape* shape); +// Converts a TensorShape into the equivalent XLA Shape proto, taking an +// xla::PrimitiveType to specify the element type. This never fails. +xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type, + const TensorShape& tensor_shape); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_SHAPE_UTIL_H_ -- GitLab From 0cf3690400e46bd89b48a206eff8dd08a660aced Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 18 Sep 2018 02:04:31 -0700 Subject: [PATCH 0321/1357] compat: Update forward compatibility horizon to 2018-09-18 PiperOrigin-RevId: 213414462 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 0d2f2c9b9e..157e699604 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 17) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 18) @tf_export("compat.forward_compatible") -- GitLab From c6a060c83cc56c8c0cc0f1105550def4bff93c0d Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 18 Sep 2018 05:22:55 -0700 Subject: [PATCH 0322/1357] Simplify the interface of conversion_call to allow a ConversionOptions object that can be more easily extended. Currently any new argument needs changing a lot of call sites and there is redundancy in argument documentation. Note: this does not modify the public symbols yet - it's not clear whether we want to complicate their interface. However we may want to use it in to_graph and to_code. PiperOrigin-RevId: 213433379 --- tensorflow/python/autograph/__init__.py | 2 + .../python/autograph/converters/call_trees.py | 11 ++- .../autograph/core/converter_testing.py | 12 ++- tensorflow/python/autograph/impl/api.py | 83 +++++++++++++++---- tensorflow/python/autograph/impl/api_test.py | 24 +++--- .../python/autograph/impl/conversion.py | 1 + 6 files changed, 102 insertions(+), 31 deletions(-) diff --git a/tensorflow/python/autograph/__init__.py b/tensorflow/python/autograph/__init__.py index c3448e6e58..5ed5e85158 100644 --- a/tensorflow/python/autograph/__init__.py +++ b/tensorflow/python/autograph/__init__.py @@ -27,6 +27,7 @@ from tensorflow.python.autograph import utils from tensorflow.python.autograph.core.errors import GraphConstructionError from tensorflow.python.autograph.core.errors import TfRuntimeError from tensorflow.python.autograph.core.errors import improved_errors +from tensorflow.python.autograph.impl.api import ConversionOptions from tensorflow.python.autograph.impl.api import RunMode from tensorflow.python.autograph.impl.api import convert from tensorflow.python.autograph.impl.api import converted_call @@ -42,6 +43,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ # Main API + 'ConversionOptions', 'RunMode', 'convert', 'converted_call', diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py index 6a606c450d..fc2075b781 100644 --- a/tensorflow/python/autograph/converters/call_trees.py +++ b/tensorflow/python/autograph/converters/call_trees.py @@ -238,9 +238,16 @@ class CallTreeTransformer(converter.Base): # Before we could convert all the time though, we'd need a reasonable # caching mechanism. template = """ - ag__.converted_call(func, True, False, False, {}, args) + ag__.converted_call( + func, + ag__.ConversionOptions.new(recursive=recursive_val), + args) """ - call_expr = templates.replace(template, func=node.func, args=node.args) + call_expr = templates.replace( + template, + func=node.func, + recursive_val=parser.parse_expression(str(self.ctx.program.recursive)), + args=node.args) new_call = call_expr[0].value # TODO(mdan): Improve the template mechanism to better support this. new_call.keywords = node.keywords diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py index 0a0c6f9002..7ce1b7c4c5 100644 --- a/tensorflow/python/autograph/core/converter_testing.py +++ b/tensorflow/python/autograph/core/converter_testing.py @@ -93,11 +93,21 @@ class TestCase(test.TestCase): self.dynamic_calls.append(args) return 7 + class ConversionOptions(object): + """Mock version of api.ConversionOptions.""" + + def __init__(self, recursive): + self.recursive = recursive + + @classmethod + def new(cls, recursive): + cls(recursive) + try: result, source = compiler.ast_to_object(node, include_source_map=True) result.tf = self.make_fake_mod('fake_tf', *symbols) - fake_ag = self.make_fake_mod('fake_ag', converted_call) + fake_ag = self.make_fake_mod('fake_ag', converted_call, ConversionOptions) fake_ag.__dict__.update(operators.__dict__) fake_ag.__dict__['utils'] = utils fake_ag.__dict__['rewrite_graph_construction_error'] = ( diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py index 669d36bd28..ee2467e0dc 100644 --- a/tensorflow/python/autograph/impl/api.py +++ b/tensorflow/python/autograph/impl/api.py @@ -18,7 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from functools import wraps +import collections +import functools from enum import Enum @@ -38,6 +39,41 @@ from tensorflow.python.util import tf_inspect # (currently we require (module + class name, type)) +class ConversionOptions( + collections.namedtuple('ConversionOptions', + ('recursive', 'verbose', 'strip_decorators', + 'force_conversion', 'arg_types'))): + """Container for conversion flags. + + Attributes: + recursive: bool, whether to recursively convert any user functions or + classes that the converted function may use. + verbose: bool, whether to log the compiled code. + strip_decorators: Tuple[Callable], contains decorators that should be in + excluded from the compiled output. By default, when converting a + function before the decorators are applied, the compiled output will + include those decorators. + force_conversion: bool, whether to force convertinng the target entity. + When force_conversion is turned off, the converter may decide to + return the function as-is. + arg_types: Optional[Dict[Text, Type]], type hints for symbols including + function arguments. + """ + + @classmethod + def new(cls, + recursive=False, + verbose=False, + strip_decorators=None, + force_conversion=False, + arg_types=None): + return cls(recursive=recursive, + verbose=verbose, + strip_decorators=strip_decorators or (), + force_conversion=force_conversion, + arg_types=arg_types or {}) + + # TODO(mdan): This should behave like to_graph (e.g. convert statically). def convert(recursive=False, verbose=False): """Decorator that compiles a function to use TensorFlow ops. @@ -59,9 +95,15 @@ def convert(recursive=False, verbose=False): def decorator(f): """Decorator implementation.""" - @wraps(f) + @functools.wraps(f) def wrapper(*args, **kwargs): - return converted_call(f, recursive, verbose, True, {}, *args, **kwargs) + return converted_call( + f, + ConversionOptions.new( + recursive=recursive, + verbose=verbose, + force_conversion=True, + ), *args, **kwargs) wrapper = tf_decorator.make_decorator(f, wrapper) @@ -107,11 +149,11 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None): def decorator(f): """Decorator implementation.""" - @wraps(f) + @functools.wraps(f) def graph_wrapper(*args, **kwargs): return f(*args, **kwargs) - @wraps(f) + @functools.wraps(f) def py_func_wrapper(*args, **kwargs): if kwargs: raise NotImplementedError('RunMode.PY_FUNC does not yet support kwargs') @@ -135,12 +177,11 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None): # TODO(mdan): Move to a private, undocumented module. -def converted_call(f, recursive, verbose, force_conversion, arg_types, *args, - **kwargs): +def converted_call(f, options, *args, **kwargs): """Compiles a function call inline. For internal use only.""" # TODO(mdan): This needs cleanup. # In particular, we may want to avoid renaming functions altogether. - if not force_conversion and conversion.is_whitelisted_for_graph(f): + if not options.force_conversion and conversion.is_whitelisted_for_graph(f): return f(*args, **kwargs) unknown_arg_value = object() # Sentinel for arguments of unknown value @@ -183,8 +224,8 @@ def converted_call(f, recursive, verbose, force_conversion, arg_types, *args, continue arg_class = arg.__class__ # If arg_value_hints specifies any name, use that instead. - if name not in arg_types: - arg_types[name] = (arg_class.__name__, arg_class) + if name not in options.arg_types: + options.arg_types[name] = (arg_class.__name__, arg_class) # When called from within a decorator, this is the only indication that # the function is a method - it appears that the decorator is applied @@ -199,23 +240,25 @@ def converted_call(f, recursive, verbose, force_conversion, arg_types, *args, converted_f = to_graph( target_entity, - recursive=recursive, - verbose=verbose, + recursive=options.recursive, + verbose=options.verbose, arg_values=arg_values, - arg_types=arg_types, - partial_types=partial_types) + arg_types=options.arg_types, + partial_types=partial_types, + strip_decorators=options.strip_decorators) return converted_f(*effective_args, **kwargs) # TODO(mdan): Rename: to_ops? -# TODO(mdan): Looki into overloading as function and decorator, like tfe.defun. +# TODO(mdan): Look into overloading as function and decorator, like tfe.defun? # TODO(mdan): Remove partial_types. def to_graph(e, recursive=True, verbose=False, arg_values=None, arg_types=None, - partial_types=None): + partial_types=None, + strip_decorators=None): """Converts a Python entity into equivalent code that uses TensorFlow ops. Supported Python entities include: @@ -234,6 +277,8 @@ def to_graph(e, arg_types: Optional[Dict[Text, Type]], type hints for symbols including function arguments. partial_types: Set[Type], reserved for internal use. + strip_decorators: Tuple[Callable], same as + ConversionOptions.strip_decorators. Returns: Union[Callable, Type], the converted entity, which is the same kind as e @@ -243,9 +288,13 @@ def to_graph(e, Raises: ValueError: If the entity could not be converted. """ + if strip_decorators is None: + strip_decorators = () + strip_decorators += (convert, do_not_convert, converted_call) + program_ctx = converter.ProgramContext( recursive=recursive, - autograph_decorators=(convert, do_not_convert, converted_call), + autograph_decorators=strip_decorators, partial_types=partial_types, autograph_module=tf_inspect.getmodule(to_graph), uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES) diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py index 54e12f0223..e0770ef4c6 100644 --- a/tensorflow/python/autograph/impl/api_test.py +++ b/tensorflow/python/autograph/impl/api_test.py @@ -32,7 +32,6 @@ from tensorflow.python.util import tf_inspect tf = utils.fake_tf() - class ApiTest(test.TestCase): def setUp(self): @@ -180,8 +179,9 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): while tf.reduce_sum(x) > s: - x //= api.converted_call(self.called_member, False, False, False, {}, - self, a) + x //= api.converted_call( + self.called_member, + api.ConversionOptions.new(), self, a) return x tc = TestClass() @@ -192,7 +192,7 @@ class ApiTest(test.TestCase): self.assertListEqual([0, 1], sess.run(x).tolist()) def test_converted_call_builtin(self): - x = api.converted_call(range, False, False, False, {}, 3) + x = api.converted_call(range, api.ConversionOptions.new(), 3) self.assertEqual((0, 1, 2), tuple(x)) def test_converted_call_function(self): @@ -203,7 +203,7 @@ class ApiTest(test.TestCase): return x with self.test_session() as sess: - x = api.converted_call(test_fn, False, False, False, {}, + x = api.converted_call(test_fn, api.ConversionOptions.new(), constant_op.constant(-1)) self.assertEqual(1, sess.run(x)) @@ -221,7 +221,7 @@ class ApiTest(test.TestCase): with self.test_session() as sess: tc = TestClass(constant_op.constant(-1)) - x = api.converted_call(tc.test_method, False, False, False, {}, tc) + x = api.converted_call(tc.test_method, api.ConversionOptions.new(), tc) self.assertEqual(1, sess.run(x)) def test_converted_call_method_by_class(self): @@ -238,7 +238,9 @@ class ApiTest(test.TestCase): with self.test_session() as sess: tc = TestClass(constant_op.constant(-1)) - x = api.converted_call(TestClass.test_method, False, False, False, {}, tc) + x = api.converted_call( + TestClass.test_method, + api.ConversionOptions.new(), tc) self.assertEqual(1, sess.run(x)) def test_converted_call_callable_object(self): @@ -255,7 +257,7 @@ class ApiTest(test.TestCase): with self.test_session() as sess: tc = TestClass(constant_op.constant(-1)) - x = api.converted_call(tc, False, False, False, {}) + x = api.converted_call(tc, api.ConversionOptions.new()) self.assertEqual(1, sess.run(x)) def test_converted_call_constructor(self): @@ -271,7 +273,7 @@ class ApiTest(test.TestCase): return self.x with self.test_session() as sess: - tc = api.converted_call(TestClass, False, False, False, {}, + tc = api.converted_call(TestClass, api.ConversionOptions.new(), constant_op.constant(-1)) # tc is now a converted object. x = tc.test_method() @@ -283,12 +285,12 @@ class ApiTest(test.TestCase): return x == 0 with self.test_session() as sess: - x = api.converted_call(f, False, False, False, {}, + x = api.converted_call(f, api.ConversionOptions.new(), constant_op.constant(0)) self.assertTrue(sess.run(x)) converted_f = api.to_graph(f) - x = api.converted_call(converted_f, False, False, False, {}, + x = api.converted_call(converted_f, api.ConversionOptions.new(), constant_op.constant(0)) self.assertTrue(sess.run(x)) diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py index 928ff9e7ea..a0d13c82a8 100644 --- a/tensorflow/python/autograph/impl/conversion.py +++ b/tensorflow/python/autograph/impl/conversion.py @@ -255,6 +255,7 @@ def _add_self_references(namespace, autograph_module): # internal modules. ag_internal = imp.new_module('autograph') ag_internal.converted_call = autograph_module.converted_call + ag_internal.ConversionOptions = autograph_module.ConversionOptions ag_internal.utils = utils ag_internal.rewrite_graph_construction_error = ( errors.rewrite_graph_construction_error) -- GitLab From 25c99131362f034c3bc3805d741f0c4ab9d0cb8b Mon Sep 17 00:00:00 2001 From: "David G. Andersen" Date: Tue, 18 Sep 2018 06:51:20 -0700 Subject: [PATCH 0323/1357] Add a fuzzer to test DecodeCompressed PiperOrigin-RevId: 213441868 --- tensorflow/core/kernels/fuzzing/BUILD | 2 + .../kernels/fuzzing/decode_compressed_fuzz.cc | 45 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD index 8bfa40304e..f2e0b2558f 100644 --- a/tensorflow/core/kernels/fuzzing/BUILD +++ b/tensorflow/core/kernels/fuzzing/BUILD @@ -43,4 +43,6 @@ tf_ops_fuzz_target_lib("example_proto_fast_parsing") tf_ops_fuzz_target_lib("parse_tensor_op") +tf_ops_fuzz_target_lib("decode_compressed") + tf_ops_fuzz_target_lib("decode_json_example") diff --git a/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc b/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc new file mode 100644 index 0000000000..0a56f4b63f --- /dev/null +++ b/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc @@ -0,0 +1,45 @@ +/* Copyright 2018 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/kernels/fuzzing/fuzz_session.h" + +namespace tensorflow { +namespace fuzzing { + +class FuzzDecodeCompressed : public FuzzStringInputOp { + void BuildGraph(const Scope& scope) override { + auto input = + tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING); + auto d1 = tensorflow::ops::DecodeCompressed( + scope.WithOpName("d1"), input, + tensorflow::ops::DecodeCompressed::CompressionType("")); + auto d2 = tensorflow::ops::DecodeCompressed( + scope.WithOpName("d2"), input, + tensorflow::ops::DecodeCompressed::CompressionType("ZLIB")); + auto d3 = tensorflow::ops::DecodeCompressed( + scope.WithOpName("d3"), input, + tensorflow::ops::DecodeCompressed::CompressionType("GZIP")); + Scope grouper = + scope.WithControlDependencies(std::vector{ + d1.output.op(), d2.output.op(), d3.output.op()}); + (void)tensorflow::ops::NoOp(grouper.WithOpName("output")); + } +}; + +STANDARD_TF_FUZZ_FUNCTION(FuzzDecodeCompressed); + +} // namespace fuzzing +} // namespace tensorflow -- GitLab From 97011c17de3f21ae7d40f89f09bf7513dc0e49aa Mon Sep 17 00:00:00 2001 From: Geoffrey Irving Date: Fri, 7 Sep 2018 09:01:56 -0700 Subject: [PATCH 0324/1357] Make tf.random_uniform([0], maxval=0, dtype=tf.int32) not crash For integers, tf.random_uniform enforces a nonempty range with minval < maxval. However, an empty range is fine if we're producing no output values, and this degenerate case occurs naturally for some code patterns. Thus, tf.random_uniform now allows empty ranges for integer random numbers if the output shape is empty. --- tensorflow/core/kernels/random_op.cc | 10 +++++++--- .../python/kernel_tests/random/random_ops_test.py | 9 +++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index e37232539f..04a53697c0 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -231,7 +231,13 @@ class RandomUniformIntOp : public OpKernel { errors::InvalidArgument("maxval must be 0-D, got shape ", maxval.shape().DebugString())); - // Verify that minval < maxval + // Allocate output, and exit early if possible + Tensor* output; + OP_REQUIRES_OK(ctx, AllocateOutputWithShape(ctx, shape, 0, &output)); + if (output->NumElements() == 0) return; + + // Verify that minval < maxval. This check intentionally happens after the + // early exit for empty output. Zero impossible things are fine. IntType lo = minval.scalar()(); IntType hi = maxval.scalar()(); OP_REQUIRES( @@ -243,8 +249,6 @@ class RandomUniformIntOp : public OpKernel { Distribution; Distribution dist(lo, hi); - Tensor* output; - OP_REQUIRES_OK(ctx, AllocateOutputWithShape(ctx, shape, 0, &output)); auto output_flat = output->flat(); functor::FillPhiloxRandom()( ctx, ctx->eigen_device(), diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py index 0ef6a95cfc..d199a9d9dd 100644 --- a/tensorflow/python/kernel_tests/random/random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/random_ops_test.py @@ -320,6 +320,15 @@ class RandomUniformTest(RandomOpTestCommon): error = np.abs(counts - mean) self.assertLess(error.max(), 5 * std) + # Check that minval = maxval is fine iff we're producing no numbers + def testUniformIntsDegenerate(self): + for dt in dtypes.int32, dtypes.int64: + def sample(n): + return self._Sampler(n, minv=0, maxv=0, dtype=dt, use_gpu=True)() + self.assertEqual(sample(0).shape, (10, 0)) + with self.assertRaisesOpError('Need minval < maxval, got 0 >= 0'): + sample(1) + # Checks that the CPU and GPU implementation returns the same results, # given the same random seed def testCPUGPUMatch(self): -- GitLab From 18b47f08b13c628ef87d9a99f7fde743baca5300 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Tue, 18 Sep 2018 08:30:46 -0700 Subject: [PATCH 0325/1357] Automated rollback of commit 19d66a950e2091bb598c6a2d375e14208f5773b2 PiperOrigin-RevId: 213453719 --- tensorflow/contrib/tensorrt/BUILD | 21 +------------------ tensorflow/contrib/tensorrt/test/base_test.py | 6 ++++++ .../test/tf_trt_integration_test_base.py | 5 +++-- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 4ea7216ef2..9e8979bce4 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -444,6 +444,7 @@ cuda_py_test( cuda_py_tests( name = "tf_trt_integration_test", srcs = [ + "test/base_test.py", "test/batch_matmul_test.py", "test/biasadd_matmul_test.py", "test/binary_tensor_weight_broadcast_test.py", @@ -470,26 +471,6 @@ cuda_py_tests( ], ) -cuda_py_tests( - name = "base_test", - srcs = [ - "test/base_test.py", - ], - additional_deps = [ - ":tf_trt_integration_test_base", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_test_lib", - ], - tags = [ - "manual", - "no_cuda_on_cpu_tap", - "no_gpu", - "no_windows", - "nomac", - "notap", - ], -) - cc_library( name = "utils", srcs = ["convert/utils.cc"], diff --git a/tensorflow/contrib/tensorrt/test/base_test.py b/tensorflow/contrib/tensorrt/test/base_test.py index e9ac833d55..7e9ffb05ab 100644 --- a/tensorflow/contrib/tensorrt/test/base_test.py +++ b/tensorflow/contrib/tensorrt/test/base_test.py @@ -183,6 +183,12 @@ class PartiallyConvertedTestA(trt_test.TfTrtIntegrationTestBase): "my_trt_op_0": ["c0", "c1", "add0", "add1", "mul0", "mul1"] } + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + # Disable the test in fp16 mode since multiple matmul and add ops together + # can cause overflow. + return run_params.precision_mode != "FP16" + class PartiallyConvertedTestB(PartiallyConvertedTestA): diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py index fc647e4eb9..699f79adec 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py @@ -179,11 +179,11 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): def ExpectedAbsoluteTolerance(self, run_params): """The absolute tolerance to compare floating point results.""" - return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-03 + return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-02 def ExpectedRelativeTolerance(self, run_params): """The relative tolerance to compare floating point results.""" - return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-03 + return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-02 def _GetParamsCached(self): if self._trt_test_params is None: @@ -414,6 +414,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): if not self.ShouldRunTest(run_params): return assert run_params.precision_mode in PRECISION_MODES + np.random.seed(12345) params = self._GetParamsCached() input_gdef = params.gdef -- GitLab From b1ff7c2cedcc7d49d430d56655870e6d68a0c8f7 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 18 Sep 2018 08:47:31 -0700 Subject: [PATCH 0326/1357] Creating an InstantiatedCapturedFunction that captures the instantiated state of a function to be executed, separating it out from the non instantiated regular state such as function name, captured inputs etc. This allows us to truly separate Dataset kernel creation from Iterator creation i.e. each time a dataset is created that uses functions, we create only a CapturedFunction whereas we create an InstantiatedCapturedFunction each time a new iterator is created. PiperOrigin-RevId: 213456128 --- .../core/kernels/data/captured_function.cc | 206 ++++++++---------- .../core/kernels/data/captured_function.h | 129 ++++++----- tensorflow/core/kernels/data/dataset_utils.cc | 9 +- tensorflow/core/kernels/data/dataset_utils.h | 5 +- .../core/kernels/data/filter_dataset_op.cc | 33 +-- .../core/kernels/data/flat_map_dataset_op.cc | 7 +- .../core/kernels/data/generator_dataset_op.cc | 23 +- .../data/group_by_reducer_dataset_op.cc | 31 +-- .../data/group_by_window_dataset_op.cc | 25 ++- .../kernels/data/interleave_dataset_op.cc | 8 +- .../kernels/data/map_and_batch_dataset_op.cc | 6 +- .../core/kernels/data/map_dataset_op.cc | 6 +- .../data/parallel_interleave_dataset_op.cc | 16 +- .../kernels/data/parallel_map_dataset_op.cc | 57 +++-- .../kernels/data/parallel_map_iterator.cc | 37 +--- .../core/kernels/data/parallel_map_iterator.h | 44 ++-- .../kernels/data/parse_example_dataset_op.cc | 185 +++++++++------- .../core/kernels/data/scan_dataset_op.cc | 8 +- 18 files changed, 440 insertions(+), 395 deletions(-) diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index b3ab7e2bc6..96ae8e16d5 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -46,10 +46,36 @@ Status CapturedFunction::Create( return Status::OK(); } -CapturedFunction::~CapturedFunction() { - if (lib_ != nullptr && f_handle_ != kInvalidHandle) { - lib_->ReleaseHandle(f_handle_).IgnoreError(); +Status CapturedFunction::Instantiate( + IteratorContext* ctx, std::unique_ptr* + instantiated_captured_function) { + // The context's runtime will be used for all subsequent calls. + FunctionLibraryRuntime* lib = ctx->lib(); + FunctionLibraryRuntime::InstantiateOptions inst_opts; + inst_opts.overlay_lib = ctx->function_library().get(); + inst_opts.state_handle = std::to_string(random::New64()); + inst_opts.create_kernels_eagerly = true; + if (!use_inter_op_parallelism_) { + inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR"; + } + + FunctionLibraryRuntime::Handle f_handle; + Status s = (lib->Instantiate(func_.name(), AttrSlice(&func_.attr()), + inst_opts, &f_handle)); + TF_RETURN_IF_ERROR(s); + const FunctionBody* fbody = lib->GetFunctionBody(f_handle); + if (fbody == nullptr) { + return errors::Internal("Failed to instantiate function body."); } + + DataTypeVector ret_types; + for (const auto& ret_type : fbody->ret_types) { + ret_types.push_back(ret_type); + } + + instantiated_captured_function->reset(new InstantiatedCapturedFunction( + lib, f_handle, std::move(ret_types), *ctx->runner(), this)); + return Status::OK(); } namespace { @@ -172,35 +198,34 @@ class BorrowedArgsCallFrame : public CallFrameBase { } // namespace -Status CapturedFunction::GetHandle(IteratorContext* ctx, - FunctionLibraryRuntime::Handle* out_handle) { - tf_shared_lock l(mu_); - if (lib_ == nullptr) { - return errors::Internal("Captured function \"", func_.name(), - "\" was called before it was instantiated."); - } - if (ctx->lib() != lib_) { - return errors::Internal("Captured function \"", func_.name(), - "\" was called with a different " - "FunctionLibraryRuntime*, which is not permitted."); +InstantiatedCapturedFunction::InstantiatedCapturedFunction( + FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle, + DataTypeVector ret_types, std::function)> runner, + CapturedFunction* captured_func) + : lib_(lib), + f_handle_(f_handle), + ret_types_(std::move(ret_types)), + captured_runner_(std::move(runner)), + captured_func_(captured_func) {} + +InstantiatedCapturedFunction::~InstantiatedCapturedFunction() { + if (lib_ != nullptr && f_handle_ != kInvalidHandle) { + lib_->ReleaseHandle(f_handle_).IgnoreError(); } - *out_handle = f_handle_; - return Status::OK(); } -Status CapturedFunction::Run(IteratorContext* ctx, std::vector&& args, - std::vector* rets) { - FunctionLibraryRuntime::Handle handle; - TF_RETURN_IF_ERROR(GetHandle(ctx, &handle)); - +Status InstantiatedCapturedFunction::Run(IteratorContext* ctx, + std::vector&& args, + std::vector* rets) const { FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = CapturedFunction::generate_step_id(); - ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) { - ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError(); - }); + f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); + ScopedStepContainer step_container( + f_opts.step_id, [this](const string& name) { + lib_->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); f_opts.step_container = &step_container; f_opts.runner = ctx->runner(); - if (ctx->lib()->device()->device_type() != DEVICE_CPU) { + if (lib_->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -212,10 +237,11 @@ Status CapturedFunction::Run(IteratorContext* ctx, std::vector&& args, CancellationManager c_mgr; f_opts.cancellation_manager = &c_mgr; - OwnedArgsCallFrame frame(std::move(args), &captured_inputs_, ret_types_); + OwnedArgsCallFrame frame(std::move(args), &captured_func_->captured_inputs(), + ret_types_); Notification n; Status s; - ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) { + lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) { s.Update(func_status); n.Notify(); }); @@ -224,20 +250,18 @@ Status CapturedFunction::Run(IteratorContext* ctx, std::vector&& args, return frame.ConsumeRetvals(rets); } -Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx, - const std::vector& args, - std::vector* rets) { - FunctionLibraryRuntime::Handle handle; - TF_RETURN_IF_ERROR(GetHandle(ctx, &handle)); - +Status InstantiatedCapturedFunction::RunWithBorrowedArgs( + IteratorContext* ctx, const std::vector& args, + std::vector* rets) const { FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = CapturedFunction::generate_step_id(); - ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) { - ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError(); - }); + f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); + ScopedStepContainer step_container( + f_opts.step_id, [this](const string& name) { + lib_->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); f_opts.step_container = &step_container; f_opts.runner = ctx->runner(); - if (ctx->lib()->device()->device_type() != DEVICE_CPU) { + if (lib_->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -249,11 +273,12 @@ Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx, CancellationManager c_mgr; f_opts.cancellation_manager = &c_mgr; - BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_); + BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(), + ret_types_); Notification n; Status s; - ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) { + lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) { s.Update(func_status); n.Notify(); }); @@ -262,65 +287,17 @@ Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx, return frame.ConsumeRetvals(rets); } -Status CapturedFunction::Instantiate(IteratorContext* ctx) { - mutex_lock l(mu_); - if (lib_ == nullptr) { - // The context's runtime will be used for all subsequent calls. - lib_ = ctx->lib(); - DCHECK(f_handle_ == kInvalidHandle); - FunctionLibraryRuntime::InstantiateOptions inst_opts; - inst_opts.overlay_lib = ctx->function_library().get(); - inst_opts.state_handle = std::to_string(random::New64()); - inst_opts.create_kernels_eagerly = true; - if (!use_inter_op_parallelism_) { - inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR"; - } - Status s = (lib_->Instantiate(func_.name(), AttrSlice(&func_.attr()), - inst_opts, &f_handle_)); - TF_RETURN_IF_ERROR(s); - const FunctionBody* fbody = lib_->GetFunctionBody(f_handle_); - if (fbody == nullptr) { - return errors::Internal("Failed to instantiate function body."); - } - ret_types_ = fbody->ret_types; - } else { - if (ctx->lib() != lib_) { - return errors::Internal( - "Captured function was called with a different " - "FunctionLibraryRuntime*, which is not permitted."); - } - } - if (captured_runner_ == nullptr) { - captured_runner_ = *ctx->runner(); - } - return Status::OK(); -} - -Status CapturedFunction::RunInstantiated(const std::vector& args, - std::vector* rets) { - FunctionLibraryRuntime* lib; - FunctionLibraryRuntime::Handle handle; - std::function)>* runner; - { - tf_shared_lock l(mu_); - if (lib_ == nullptr) { - return errors::FailedPrecondition( - "`CapturedFunction::Instantiate()` must be called before a call to " - "`CapturedFunction::RunInstantiated()`."); - } - lib = lib_; - handle = f_handle_; - runner = &captured_runner_; - } - +Status InstantiatedCapturedFunction::RunInstantiated( + const std::vector& args, std::vector* rets) { FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = CapturedFunction::generate_step_id(); - ScopedStepContainer step_container(f_opts.step_id, [lib](const string& name) { - lib->device()->resource_manager()->Cleanup(name).IgnoreError(); - }); + f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); + ScopedStepContainer step_container( + f_opts.step_id, [this](const string& name) { + lib_->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); f_opts.step_container = &step_container; - f_opts.runner = runner; - if (lib->device()->device_type() != DEVICE_CPU) { + f_opts.runner = &captured_runner_; + if (lib_->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -332,11 +309,12 @@ Status CapturedFunction::RunInstantiated(const std::vector& args, CancellationManager c_mgr; f_opts.cancellation_manager = &c_mgr; - BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_); + BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(), + ret_types_); Notification n; Status s; - lib->Run(f_opts, handle, &frame, [&n, &s](Status func_status) { + lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) { s.Update(func_status); n.Notify(); }); @@ -345,33 +323,25 @@ Status CapturedFunction::RunInstantiated(const std::vector& args, return frame.ConsumeRetvals(rets); } -void CapturedFunction::RunAsync(IteratorContext* ctx, - std::vector&& args, - std::vector* rets, - FunctionLibraryRuntime::DoneCallback done, - const string& prefix) { +void InstantiatedCapturedFunction::RunAsync( + IteratorContext* ctx, std::vector&& args, std::vector* rets, + FunctionLibraryRuntime::DoneCallback done, const string& prefix) const { // NOTE(mrry): This method does not transfer ownership of `ctx`, and it may // be deleted before `done` is called. Take care not to capture `ctx` in any // code that may execute asynchronously in this function. - FunctionLibraryRuntime::Handle handle; - Status s = GetHandle(ctx, &handle); - if (!s.ok()) { - done(s); - return; - } - auto frame = - new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_); + auto frame = new OwnedArgsCallFrame( + std::move(args), &captured_func_->captured_inputs(), ret_types_); FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = CapturedFunction::generate_step_id(); - ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager(); + f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); + ResourceMgr* resource_mgr = lib_->device()->resource_manager(); auto step_container = new ScopedStepContainer( f_opts.step_id, [resource_mgr](const string& name) { resource_mgr->Cleanup(name).IgnoreError(); }); f_opts.step_container = step_container; f_opts.runner = ctx->runner(); - if (ctx->lib()->device()->device_type() != DEVICE_CPU) { + if (lib_->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -426,15 +396,13 @@ void CapturedFunction::RunAsync(IteratorContext* ctx, }, std::move(done), std::placeholders::_1); - ctx->lib()->Run(f_opts, handle, frame, std::move(callback)); + lib_->Run(f_opts, f_handle_, frame, std::move(callback)); } CapturedFunction::CapturedFunction(const NameAttrList& func, std::vector captured_inputs, bool use_inter_op_parallelism) : func_(func), - lib_(nullptr), - f_handle_(kInvalidHandle), captured_inputs_(std::move(captured_inputs)), use_inter_op_parallelism_(use_inter_op_parallelism) {} diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h index a10376bf97..1b10725082 100644 --- a/tensorflow/core/kernels/data/captured_function.h +++ b/tensorflow/core/kernels/data/captured_function.h @@ -34,59 +34,41 @@ class ResourceMgr; namespace data { -// A `CapturedFunction` encapsulates a TensorFlow function and all of -// the runtime support required to execute it. +class CapturedFunction; + +// An InstantiatedCapturedFunction encapsulates all the runtime support needed +// to execute a tensorflow function. // -// The `Dataset`-related classes use `CapturedFunction` to execute -// TensorFlow functions outside a the normal `OpKernel::Compute()` -// context. -class CapturedFunction { +// While CapturedFunction (below) encapsulates the more permanent attributes +// of the function i.e. name, captured arguments etc., +// InstantiatedCapturedFunction encapsulates the more runtime aspects i.e. +// FunctionLibraryRuntime, function handle etc. +// +// The `Iterator-`related classes use `InstantiatedCapturedFunction` to execute +// functions. +class InstantiatedCapturedFunction { public: - // Creates a new instance using a list of named attributes, fetching captured - // inputs from a context argument. - static Status Create(const NameAttrList& func, OpKernelContext* ctx, - const string& argument, - std::unique_ptr* out_function); + ~InstantiatedCapturedFunction(); - // Creates a new instance using a list of named attributes, fetching captured - // inputs from a context argument. - // - // If `use_inter_op_parallelism` is false, the runtime may use an executor - // that is optimized for small functions. - static Status Create(const NameAttrList& func, OpKernelContext* ctx, - const string& argument, bool use_inter_op_parallelism, - std::unique_ptr* out_function); - - ~CapturedFunction(); - - // Runs the "Captured function" using the given FLR and caches the lib and - // handle generated during instantiation. If Run is called with a different - // lib afterwards, generates an error. This method takes ownership of the - // tensors in `args`, in order to be able to deallocate them as early as + // Runs the "Instantiated Captured function". This method takes ownership of + // the tensors in `args`, in order to be able to deallocate them as early as // possible. Use `RunWithBorrowedArgs()` if the caller needs to retain // ownership of the `args`. Status Run(IteratorContext* ctx, std::vector&& args, - std::vector* rets); + std::vector* rets) const; // Synchronously runs the captured function on the given `args`, and stores // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when // possible. Status RunWithBorrowedArgs(IteratorContext* ctx, const std::vector& args, - std::vector* rets); - - // Explicitly instantiate this function for use in the given - // context. This method, and the context-less overload - // `RunInstantiated()` below can be useful for calling a captured - // function in cases where an `IteratorContext*` is not available - // (such as a destructor). - Status Instantiate(IteratorContext* ctx); + std::vector* rets) const; // Synchronously runs the captured function on the given `args`, and stores // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when - // possible. - // - // REQUIRES: `this->Instantiate()` must have been called before this method. + // possible. This can be useful for calling a captured + // function in cases where an `IteratorContext*` is not available + // (such as a destructor). Status RunInstantiated(const std::vector& args, std::vector* rets); @@ -97,16 +79,9 @@ class CapturedFunction { void RunAsync(IteratorContext* ctx, std::vector&& args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done, - const string& prefix); - - // Returns the named list of function arguments. - const NameAttrList& func() { return func_; } + const string& prefix) const; - // Returns that additional captured inputs that will be passed to the function - // when `Run*()` is called. - const std::vector& captured_inputs() { return captured_inputs_; } - - // Returns a step ID for use when running a `CapturedFunction`. + // Returns a step ID for use when running an `InstantiatedCapturedFunction`. static int64 generate_step_id() { // Choose a step ID that is guaranteed not to clash with any // Session-generated step ID. DirectSession only generates @@ -116,21 +91,65 @@ class CapturedFunction { return -std::abs(static_cast(random::New64())); } + private: + InstantiatedCapturedFunction( + FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle, + DataTypeVector ret_types, + std::function)> runner, + CapturedFunction* captured_func); + + friend class CapturedFunction; + + FunctionLibraryRuntime* const lib_; + const FunctionLibraryRuntime::Handle f_handle_; + const DataTypeVector ret_types_; + std::function)> captured_runner_; + CapturedFunction* const captured_func_; + + TF_DISALLOW_COPY_AND_ASSIGN(InstantiatedCapturedFunction); +}; + +// A `CapturedFunction` encapsulates a TensorFlow function. +// +// The `Dataset`-related classes use `CapturedFunction` to execute +// TensorFlow functions outside a the normal `OpKernel::Compute()` +// context. +class CapturedFunction { + public: + // Creates a new instance using a list of named attributes, fetching captured + // inputs from a context argument. + static Status Create(const NameAttrList& func, OpKernelContext* ctx, + const string& argument, + std::unique_ptr* out_function); + + // Creates a new instance using a list of named attributes, fetching captured + // inputs from a context argument. + // + // If `use_inter_op_parallelism` is false, the runtime may use an executor + // that is optimized for small functions. + static Status Create(const NameAttrList& func, OpKernelContext* ctx, + const string& argument, bool use_inter_op_parallelism, + std::unique_ptr* out_function); + + // Instantiates this function for use in the given context, providing an + // InstantiatedCapturedFunction that can be used to execute functions. + Status Instantiate(IteratorContext* ctx, + std::unique_ptr* + instantiated_captured_function); + + // Returns the named list of function arguments. + const NameAttrList& func() { return func_; } + + // Returns that additional captured inputs that will be passed to the function + const std::vector& captured_inputs() { return captured_inputs_; } + private: CapturedFunction(const NameAttrList& func, std::vector captured_inputs, bool use_inter_op_parallelism); - Status GetHandle(IteratorContext* ctx, - FunctionLibraryRuntime::Handle* out_handle); - - mutex mu_; const NameAttrList func_; - FunctionLibraryRuntime* lib_ GUARDED_BY(mu_); - FunctionLibraryRuntime::Handle f_handle_ GUARDED_BY(mu_); const std::vector captured_inputs_; - DataTypeSlice ret_types_; - std::function)> captured_runner_ = nullptr; const bool use_inter_op_parallelism_; TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction); diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index e7ac368ae3..36a1837295 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -21,12 +21,13 @@ namespace data { Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, - int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, - std::unique_ptr* out_iterator) { + int64 thread_index, + const InstantiatedCapturedFunction& instantiated_captured_func, + StringPiece prefix, std::unique_ptr* out_iterator) { std::vector return_values; - TF_RETURN_IF_ERROR( - captured_func->RunWithBorrowedArgs(ctx, input_element, &return_values)); + TF_RETURN_IF_ERROR(instantiated_captured_func.RunWithBorrowedArgs( + ctx, input_element, &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT && TensorShapeUtils::IsScalar(return_values[0].shape()))) { diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 234856ea39..3de157b4bc 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -24,8 +24,9 @@ namespace data { Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, - int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, - std::unique_ptr* out_iterator); + int64 thread_index, + const InstantiatedCapturedFunction& instantiated_captured_func, + StringPiece prefix, std::unique_ptr* out_iterator); } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc index 19c35f94a6..dfdc16f347 100644 --- a/tensorflow/core/kernels/data/filter_dataset_op.cc +++ b/tensorflow/core/kernels/data/filter_dataset_op.cc @@ -131,9 +131,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - virtual Status EvaluatePredicate(IteratorContext* ctx, - const std::vector& element, - bool* out_matched) const = 0; + virtual Status EvaluatePredicate( + IteratorContext* ctx, + InstantiatedCapturedFunction* instantiated_captured_function, + const std::vector& element, bool* out_matched) const = 0; private: class Iterator : public DatasetIterator { @@ -144,7 +145,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } Status GetNextInternal(IteratorContext* ctx, @@ -171,8 +173,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - TF_RETURN_IF_ERROR( - dataset()->EvaluatePredicate(ctx, *out_tensors, &matched)); + TF_RETURN_IF_ERROR(dataset()->EvaluatePredicate( + ctx, instantiated_captured_func_.get(), *out_tensors, &matched)); if (!matched) { // Clear the output tensor list since it didn't match. out_tensors->clear(); @@ -206,6 +208,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { private: mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; @@ -220,14 +223,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { using FilterDatasetBase::FilterDatasetBase; protected: - Status EvaluatePredicate(IteratorContext* ctx, - const std::vector& element, - bool* out_matched) const override { + Status EvaluatePredicate( + IteratorContext* ctx, + InstantiatedCapturedFunction* instantiated_captured_function, + const std::vector& element, bool* out_matched) const override { // TODO(mrry): Avoid blocking a threadpool thread. We will need to // stack-rip the iterators and use async kernels. std::vector result; - TF_RETURN_IF_ERROR( - captured_func_->RunWithBorrowedArgs(ctx, element, &result)); + TF_RETURN_IF_ERROR(instantiated_captured_function->RunWithBorrowedArgs( + ctx, element, &result)); if (result.size() != 1 || result[0].dtype() != DT_BOOL || result[0].NumElements() != 1) { @@ -249,9 +253,10 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { index_(index) {} protected: - Status EvaluatePredicate(IteratorContext* ctx, - const std::vector& element, - bool* out_matched) const override { + Status EvaluatePredicate( + IteratorContext* ctx, + InstantiatedCapturedFunction* instantiated_captured_function, + const std::vector& element, bool* out_matched) const override { const Tensor& predicate = element[index_]; if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { return errors::InvalidArgument( diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc index 2fada22a21..3af8162137 100644 --- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc @@ -122,7 +122,8 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } Status GetNextInternal(IteratorContext* ctx, @@ -238,8 +239,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(mu_) { return MakeIteratorFromInputElement( ctx, captured_func_inputs_, element_index_++, - dataset()->captured_func_.get(), prefix(), - ¤t_element_iterator_); + *instantiated_captured_func_, prefix(), ¤t_element_iterator_); } Status BuildCurrentElementIteratorLocked(OpKernelContext* ctx) @@ -257,6 +257,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr input_impl_ GUARDED_BY(mu_); std::unique_ptr current_element_iterator_ GUARDED_BY(mu_); std::vector captured_func_inputs_ GUARDED_BY(mu_); + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc index 71a36314a0..c7d8cfce90 100644 --- a/tensorflow/core/kernels/data/generator_dataset_op.cc +++ b/tensorflow/core/kernels/data/generator_dataset_op.cc @@ -73,7 +73,8 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { ~Iterator() override { if (!finalized_) { std::vector ignored; - Status s = dataset()->finalize_func_->RunInstantiated(state_, &ignored); + Status s = + instantiated_finalize_func_->RunInstantiated(state_, &ignored); if (!s.ok()) { LOG(WARNING) << "Error occurred when finalizing GeneratorDataset iterator: " @@ -83,11 +84,14 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { } Status Initialize(IteratorContext* ctx) override { - TF_RETURN_IF_ERROR(dataset()->init_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR(dataset()->next_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(ctx)); TF_RETURN_IF_ERROR( - dataset()->init_func_->RunWithBorrowedArgs(ctx, {}, &state_)); + dataset()->init_func_->Instantiate(ctx, &instantiated_init_func_)); + TF_RETURN_IF_ERROR( + dataset()->next_func_->Instantiate(ctx, &instantiated_next_func_)); + TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate( + ctx, &instantiated_finalize_func_)); + TF_RETURN_IF_ERROR( + instantiated_init_func_->RunWithBorrowedArgs(ctx, {}, &state_)); return Status::OK(); } @@ -101,8 +105,8 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { return Status::OK(); } - Status s = - dataset()->next_func_->RunWithBorrowedArgs(ctx, state_, out_tensors); + Status s = instantiated_next_func_->RunWithBorrowedArgs(ctx, state_, + out_tensors); if (s.ok()) { *end_of_sequence = false; } else if (errors::IsOutOfRange(s)) { @@ -115,7 +119,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { // finalize function. std::vector ignored; TF_RETURN_IF_ERROR( - dataset()->finalize_func_->RunInstantiated(state_, &ignored)); + instantiated_finalize_func_->RunInstantiated(state_, &ignored)); finalized_ = true; } return s; @@ -125,6 +129,9 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { mutex mu_; bool finalized_ GUARDED_BY(mu_) = false; std::vector state_ GUARDED_BY(mu_); + std::unique_ptr instantiated_init_func_; + std::unique_ptr instantiated_next_func_; + std::unique_ptr instantiated_finalize_func_; }; const std::unique_ptr init_func_; diff --git a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc index d6ee42a7c6..9cfcbbf8f6 100644 --- a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc @@ -192,11 +192,14 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR( - dataset()->captured_finalize_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate( + ctx, &instantiated_key_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate( + ctx, &instantiated_init_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate( + ctx, &instantiated_reduce_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_finalize_func_->Instantiate( + ctx, &instantiated_finalize_func_)); return Status::OK(); } @@ -214,9 +217,8 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { if (!end_of_input_) { // Run the key function on the input element. std::vector key_func_output; - TF_RETURN_IF_ERROR( - dataset()->captured_key_func_->RunWithBorrowedArgs( - ctx, next_input_element, &key_func_output)); + TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs( + ctx, next_input_element, &key_func_output)); if (key_func_output.size() != 1 || key_func_output[0].dtype() != DT_INT64 || @@ -230,7 +232,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { if (states_.find(key) == states_.end()) { // Run the init function to create the initial state. std::vector init_func_output; - TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Run( + TF_RETURN_IF_ERROR(instantiated_init_func_->Run( ctx, std::move(key_func_output), &init_func_output)); states_[key] = init_func_output; } @@ -244,7 +246,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { std::back_inserter(args)); std::vector reduce_func_output; - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run( + TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run( ctx, std::move(args), &reduce_func_output)); states_[key] = reduce_func_output; } else { @@ -260,9 +262,8 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } - TF_RETURN_IF_ERROR( - dataset()->captured_finalize_func_->RunWithBorrowedArgs( - ctx, states_[keys_[keys_index_++]], out_tensors)); + TF_RETURN_IF_ERROR(instantiated_finalize_func_->RunWithBorrowedArgs( + ctx, states_[keys_[keys_index_++]], out_tensors)); *end_of_sequence = false; return Status::OK(); } @@ -380,6 +381,10 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { std::map> states_ GUARDED_BY(mu_); std::vector keys_ GUARDED_BY(mu_); int64 keys_index_ GUARDED_BY(mu_) = 0; + std::unique_ptr instantiated_key_func_; + std::unique_ptr instantiated_init_func_; + std::unique_ptr instantiated_reduce_func_; + std::unique_ptr instantiated_finalize_func_; }; const NameAttrList& key_func() const { return captured_key_func_->func(); } diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc index 8b417bb1c2..2ea59bee5c 100644 --- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc @@ -176,10 +176,12 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx)); - TF_RETURN_IF_ERROR( - dataset()->captured_window_size_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate( + ctx, &instantiated_key_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate( + ctx, &instantiated_reduce_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Instantiate( + ctx, &instantiated_window_size_func_)); return Status::OK(); } @@ -216,9 +218,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { // Run the key function on the input element to identify its // group. std::vector key_func_output; - TF_RETURN_IF_ERROR( - dataset()->captured_key_func_->RunWithBorrowedArgs( - ctx, next_input_element, &key_func_output)); + TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs( + ctx, next_input_element, &key_func_output)); if (key_func_output.size() != 1 || key_func_output[0].dtype() != DT_INT64 || @@ -233,7 +234,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { // Run the window size function on the key to identify its // window size. std::vector window_size_func_output; - TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run( + TF_RETURN_IF_ERROR(instantiated_window_size_func_->Run( ctx, std::move(key_func_output), &window_size_func_output)); if (window_size_func_output.size() != 1 || @@ -448,8 +449,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { std::vector args( {std::move(key_arg), std::move(group_dataset_arg)}); std::vector return_values; - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run( - ctx, std::move(args), &return_values)); + TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run(ctx, std::move(args), + &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT && @@ -478,6 +479,10 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { std::map>> groups_ GUARDED_BY(mu_); std::unique_ptr current_group_iterator_ GUARDED_BY(mu_); std::map window_sizes_ GUARDED_BY(mu_); + std::unique_ptr instantiated_key_func_; + std::unique_ptr instantiated_reduce_func_; + std::unique_ptr + instantiated_window_size_func_; }; Status OtherArgumentsNodeAndType( diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc index 0aa802b874..91c298ce9a 100644 --- a/tensorflow/core/kernels/data/interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc @@ -149,7 +149,8 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) { @@ -195,7 +196,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { if (!end_of_input_) { TF_RETURN_IF_ERROR(MakeIteratorFromInputElement( ctx, args_list_[cycle_index_], cycle_index_, - dataset()->captured_func_.get(), prefix(), + *instantiated_captured_func_, prefix(), ¤t_elements_[cycle_index_])); ++num_open_; } @@ -281,7 +282,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { &args_list_[idx][i])); } TF_RETURN_IF_ERROR(MakeIteratorFromInputElement( - ctx, args_list_[idx], idx, dataset()->captured_func_.get(), + ctx, args_list_[idx], idx, *instantiated_captured_func_, prefix(), ¤t_elements_[idx])); TF_RETURN_IF_ERROR( RestoreInput(ctx, reader, current_elements_[idx])); @@ -301,6 +302,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { int64 block_index_ GUARDED_BY(mu_) = 0; bool end_of_input_ GUARDED_BY(mu_) = false; size_t num_open_ GUARDED_BY(mu_) = 0; + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 83896219a3..71d3335452 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -218,7 +218,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } Status GetNextInternal(IteratorContext* ctx, @@ -375,7 +376,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::vector input_element) { std::shared_ptr> return_values( new std::vector()); - dataset()->captured_func_->RunAsync( + instantiated_captured_func_->RunAsync( ctx.get(), std::move(input_element), return_values.get(), [this, ctx, result, return_values, offset](Status status) { Callback(ctx, result, return_values, offset, status); @@ -672,6 +673,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::deque> batch_results_ GUARDED_BY(mu_); std::unique_ptr runner_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index f112e1dc43..5b891b4fd5 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -122,7 +122,8 @@ class MapDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } Status GetNextInternal(IteratorContext* ctx, @@ -142,7 +143,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { // TODO(mrry): Avoid blocking a threadpool thread. We will need to // stack-rip the iterators and use async kernels. Status s = - dataset()->captured_func_->Run(ctx, std::move(args), out_tensors); + instantiated_captured_func_->Run(ctx, std::move(args), out_tensors); if (errors::IsOutOfRange(s)) { // `f` may deliberately raise `errors::OutOfRange` to indicate // that we should terminate the iteration early. @@ -167,6 +168,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { private: std::unique_ptr input_impl_; + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 9cd46bf5dd..448cc93a8c 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -247,7 +247,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { AddConstantParameter(ctx, "parallelism", dataset()->cycle_length_); TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } // It is implemented so that it matches the deterministic interleave @@ -685,7 +686,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { worker_thread_states_[thread_index].iterator_creation_status = MakeIteratorFromInputElement( ctx.get(), worker_thread_states_[thread_index].input, - thread_index, dataset()->captured_func_.get(), prefix(), + thread_index, *instantiated_captured_func_, prefix(), &worker_thread_states_[thread_index].iterator); iterator_creation_status = worker_thread_states_[thread_index].iterator_creation_status; @@ -919,7 +920,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr iterator; Status s = MakeIteratorFromInputElement( ctx, worker_thread_states_[index].input, index, - dataset()->captured_func_.get(), prefix(), &iterator); + *instantiated_captured_func_, prefix(), &iterator); TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, iterator)); worker_thread_states_[index].iterator.swap(iterator); } @@ -1047,6 +1048,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { // threads have exited before any other members are deallocated. // TODO(b/65178177): Avoid allocating additional threads. std::vector> worker_threads_ GUARDED_BY(mu_); + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; @@ -1254,7 +1256,8 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_); TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } Status GetNextInternal(IteratorContext* ctx, @@ -1490,7 +1493,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { if (!end_of_input_) { Status status = MakeIteratorFromInputElement( ctx.get(), args_list_[cycle_index_], cycle_index_, - dataset()->captured_func_.get(), prefix(), + *instantiated_captured_func_, prefix(), ¤t_elements_[cycle_index_]); if (!status.ok()) { invocation_results_.emplace_back(new InvocationResult()); @@ -1599,7 +1602,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { &args_list_[idx][i])); } TF_RETURN_IF_ERROR(MakeIteratorFromInputElement( - ctx, args_list_[idx], idx, dataset()->captured_func_.get(), + ctx, args_list_[idx], idx, *instantiated_captured_func_, prefix(), ¤t_elements_[idx])); TF_RETURN_IF_ERROR( RestoreInput(ctx, reader, current_elements_[idx])); @@ -1659,6 +1662,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { // Identifies whether background activity should be cancelled. bool cancelled_ GUARDED_BY(mu_) = false; + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 6abe6c8338..822f06be9e 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -85,29 +85,11 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - auto init_func = [this](IteratorContext* ctx) { - return captured_func_->Instantiate(ctx); - }; - const string& new_prefix = strings::StrCat(prefix, "::ParallelMap"); - ParallelMapIteratorFunction map_func = - [this, new_prefix](IteratorContext* ctx, - std::vector input_element, - std::vector* result, StatusCallback done) { - captured_func_->RunAsync(ctx, std::move(input_element), result, - std::move(done), new_prefix); - }; - if (!use_inter_op_parallelism_) { - map_func = [map_func]( - IteratorContext* ctx, std::vector input_element, - std::vector* result, StatusCallback done) { - (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element), - result, std::move(done))); - }; - } - + std::unique_ptr parallel_map_dataset_functor( + new ParallelMapDatasetFunctor(this, new_prefix)); return NewParallelMapIterator({this, new_prefix}, input_, - std::move(init_func), std::move(map_func), + std::move(parallel_map_dataset_functor), num_parallel_calls_); } @@ -169,6 +151,39 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { } private: + class ParallelMapDatasetFunctor : public ParallelMapFunctor { + public: + ParallelMapDatasetFunctor(const Dataset* dataset, const string& prefix) + : dataset_(dataset), prefix_(prefix) {} + + Status InitFunc(IteratorContext* ctx) override { + return dataset_->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); + } + + void MapFunc(IteratorContext* ctx, std::vector input_element, + std::vector* result, StatusCallback done) override { + auto map_func = [this](IteratorContext* ctx, + std::vector input_element, + std::vector* result, + StatusCallback done) { + instantiated_captured_func_->RunAsync( + ctx, std::move(input_element), result, std::move(done), prefix_); + }; + if (!dataset_->use_inter_op_parallelism_) { + (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element), + result, std::move(done))); + } else { + map_func(ctx, std::move(input_element), result, std::move(done)); + } + } + + private: + const Dataset* dataset_; + const string prefix_; + std::unique_ptr instantiated_captured_func_; + }; + const DatasetBase* const input_; const NameAttrList func_; const int32 num_parallel_calls_; diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index 5f6052ce83..4f8e0489de 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -31,12 +31,11 @@ class ParallelMapIterator : public DatasetBaseIterator { explicit ParallelMapIterator( const typename DatasetBaseIterator::BaseParams& params, const DatasetBase* input_dataset, - std::function init_func, - ParallelMapIteratorFunction map_func, int32 num_parallel_calls) + std::unique_ptr parallel_map_functor, + int32 num_parallel_calls) : DatasetBaseIterator(params), input_dataset_(input_dataset), - init_func_(std::move(init_func)), - map_func_(std::move(map_func)), + parallel_map_functor_(std::move(parallel_map_functor)), num_parallel_calls_(num_parallel_calls) {} ~ParallelMapIterator() override { @@ -77,10 +76,7 @@ class ParallelMapIterator : public DatasetBaseIterator { } TF_RETURN_IF_ERROR( input_dataset_->MakeIterator(ctx, prefix(), &input_impl_)); - if (init_func_) { - TF_RETURN_IF_ERROR(init_func_(ctx)); - } - return Status::OK(); + return parallel_map_functor_->InitFunc(ctx); } Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, @@ -226,8 +222,8 @@ class ParallelMapIterator : public DatasetBaseIterator { CallCompleted(result); }; - map_func_(ctx.get(), std::move(input_element), &result->return_values, - std::move(done)); + parallel_map_functor_->MapFunc(ctx.get(), std::move(input_element), + &result->return_values, std::move(done)); } Status ProcessResult(const std::shared_ptr& result, @@ -323,8 +319,7 @@ class ParallelMapIterator : public DatasetBaseIterator { } const DatasetBase* const input_dataset_; // Not owned. - const std::function init_func_; - const ParallelMapIteratorFunction map_func_; + std::unique_ptr parallel_map_functor_; // Used for coordination between the main thread and the runner thread. mutex mu_; // Used for coordination between the main thread and the runner thread. In @@ -347,22 +342,14 @@ class ParallelMapIterator : public DatasetBaseIterator { } // namespace -std::unique_ptr NewParallelMapIterator( - const DatasetBaseIterator::BaseParams& params, - const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func, - int32 num_parallel_calls) { - return NewParallelMapIterator(params, input_dataset, nullptr, - std::move(map_func), num_parallel_calls); -} - std::unique_ptr NewParallelMapIterator( const DatasetBaseIterator::BaseParams& params, const DatasetBase* input_dataset, - std::function init_func, - ParallelMapIteratorFunction map_func, int32 num_parallel_calls) { - return std::unique_ptr( - new ParallelMapIterator(params, input_dataset, std::move(init_func), - std::move(map_func), num_parallel_calls)); + std::unique_ptr parallel_map_functor, + int32 num_parallel_calls) { + return std::unique_ptr(new ParallelMapIterator( + params, input_dataset, std::move(parallel_map_functor), + num_parallel_calls)); } } // namespace data diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h index dc26c5cf25..62e57e5335 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.h +++ b/tensorflow/core/kernels/data/parallel_map_iterator.h @@ -22,30 +22,32 @@ limitations under the License. namespace tensorflow { namespace data { -// A function that transforms elements of one dataset into another -// asynchronously. The arguments are: -// 1. An `IteratorContext*` for the context in which the function should -// execute. -// 2. A `std::vector` containing the input element. -// 3. A `std::vector*` to which the function will write the result. -// 4. A `StatusCallback` that should be invoked when the function is complete. -using ParallelMapIteratorFunction = - std::function, - std::vector*, StatusCallback)>; - -// Returns a new iterator that applies `map_func` to the elements of -// `input_dataset` using the given degree of parallelism. `init_func` (if -// specified) will be executed when the iterator is initialized (see -// `IteratorBase::Initialize()`) and enables the user to specify error checking -// logic that can fail early. +class ParallelMapFunctor { + public: + virtual ~ParallelMapFunctor() {} + + // A function that runs when the Iterator is initialized. It enables the user + // to specify error checking logic that can fail early. + virtual Status InitFunc(IteratorContext* ctx) { return Status::OK(); } + + // A function that transforms elements of one dataset into another + // asynchronously. The arguments are: + // 1. An `IteratorContext*` for the context in which the function should + // execute. + // 2. A `std::vector` containing the input element. + // 3. A `std::vector*` to which the function will write the result. + // 4. A `StatusCallback` that should be invoked when the function is complete. + virtual void MapFunc(IteratorContext* ctx, std::vector input, + std::vector* output, + StatusCallback callback) = 0; +}; + +// Returns a new iterator that uses `parallel_map_functor` to apply `MapFunc` +// to the elements of `input_dataset` using the given degree of parallelism. std::unique_ptr NewParallelMapIterator( const DatasetBaseIterator::BaseParams& params, const DatasetBase* input_dataset, - std::function init_func, - ParallelMapIteratorFunction map_func, int32 num_parallel_calls); -std::unique_ptr NewParallelMapIterator( - const DatasetBaseIterator::BaseParams& params, - const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func, + std::unique_ptr parallel_map_functor, int32 num_parallel_calls); } // namespace data diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc index c28c06da62..32210ef677 100644 --- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc +++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc @@ -182,9 +182,80 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - auto map_fn = [this](IteratorContext* ctx, - std::vector input_element, - std::vector* result, StatusCallback done) { + std::unique_ptr parse_example_functor( + new ParseExampleFunctor(this)); + return NewParallelMapIterator( + {this, strings::StrCat(prefix, "::ParseExample")}, input_, + std::move(parse_example_functor), num_parallel_calls_); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() const override { + return "ParseExampleDatasetOp::Dataset"; + } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); + + Node* num_parallle_calls_node; + std::vector dense_defaults_nodes; + dense_defaults_nodes.reserve(dense_defaults_.size()); + + TF_RETURN_IF_ERROR( + b->AddScalar(num_parallel_calls_, &num_parallle_calls_node)); + + for (const Tensor& dense_default : dense_defaults_) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node)); + dense_defaults_nodes.emplace_back(node); + } + + AttrValue sparse_keys_attr; + AttrValue dense_keys_attr; + AttrValue sparse_types_attr; + AttrValue dense_attr; + AttrValue dense_shapes_attr; + + b->BuildAttrValue(sparse_keys_, &sparse_keys_attr); + b->BuildAttrValue(dense_keys_, &dense_keys_attr); + b->BuildAttrValue(sparse_types_, &sparse_types_attr); + b->BuildAttrValue(dense_types_, &dense_attr); + b->BuildAttrValue(dense_shapes_, &dense_shapes_attr); + + TF_RETURN_IF_ERROR(b->AddDataset(this, + { + {0, input_graph_node}, + {1, num_parallle_calls_node}, + }, + {{2, dense_defaults_nodes}}, + {{"sparse_keys", sparse_keys_attr}, + {"dense_keys", dense_keys_attr}, + {"sparse_types", sparse_types_attr}, + {"Tdense", dense_attr}, + {"dense_shapes", dense_shapes_attr}}, + output)); + return Status::OK(); + } + + private: + class ParseExampleFunctor : public ParallelMapFunctor { + public: + explicit ParseExampleFunctor(const Dataset* dataset) + : dataset_(dataset) {} + + void MapFunc(IteratorContext* ctx, std::vector input_element, + std::vector* result, StatusCallback done) override { (*ctx->runner())([this, ctx, input_element, result, done]() { thread::ThreadPool* device_threadpool = ctx->lib()->device()->tensorflow_cpu_worker_threads()->workers; @@ -196,7 +267,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { for (auto it = slice.begin(); it != slice.end(); it++) slice_vec.push_back(*it); } - example::FastParseExampleConfig config = config_; + example::FastParseExampleConfig config = dataset_->config_; // local copy of config_ for modification. auto stats_aggregator = ctx->stats_aggregator(); if (stats_aggregator) { @@ -206,43 +277,50 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { Status s = FastParseExample(config, slice_vec, {}, device_threadpool, &example_result); if (s.ok()) { - (*result).resize(key_to_output_index_.size()); - for (int d = 0; d < dense_keys_.size(); ++d) { - int output_index = key_to_output_index_.at(dense_keys_[d]); - CHECK(example_result.dense_values[d].dtype() == - output_dtypes()[output_index]) + (*result).resize(dataset_->key_to_output_index_.size()); + for (int d = 0; d < dataset_->dense_keys_.size(); ++d) { + int output_index = + dataset_->key_to_output_index_.at(dataset_->dense_keys_[d]); + DCHECK(example_result.dense_values[d].dtype() == + dataset_->output_dtypes()[output_index]) << "Got wrong type for FastParseExample return value " << d << " (expected " - << DataTypeString(output_dtypes()[output_index]) << ", got " + << DataTypeString(dataset_->output_dtypes()[output_index]) + << ", got " << DataTypeString(example_result.dense_values[d].dtype()) << ")."; - CHECK(output_shapes()[output_index].IsCompatibleWith( + DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith( example_result.dense_values[d].shape())) << "Got wrong shape for FastParseExample return value " << d << " (expected " - << output_shapes()[output_index].DebugString() << ", got " + << dataset_->output_shapes()[output_index].DebugString() + << ", got " << example_result.dense_values[d].shape().DebugString() << ")."; (*result)[output_index] = example_result.dense_values[d]; } - for (int d = 0; d < sparse_keys_.size(); ++d) { + for (int d = 0; d < dataset_->sparse_keys_.size(); ++d) { Tensor serialized_sparse = Tensor(DT_VARIANT, TensorShape({3})); auto serialized_sparse_t = serialized_sparse.vec(); serialized_sparse_t(0) = example_result.sparse_indices[d]; serialized_sparse_t(1) = example_result.sparse_values[d]; serialized_sparse_t(2) = example_result.sparse_shapes[d]; - int output_index = key_to_output_index_.at(sparse_keys_[d]); - CHECK(serialized_sparse.dtype() == output_dtypes()[output_index]) + int output_index = + dataset_->key_to_output_index_.at(dataset_->sparse_keys_[d]); + DCHECK(serialized_sparse.dtype() == + dataset_->output_dtypes()[output_index]) << "Got wrong type for FastParseExample return value " << d << " (expected " - << DataTypeString(output_dtypes()[output_index]) << ", got " - << DataTypeString(serialized_sparse.dtype()) << ")."; - CHECK(output_shapes()[output_index].IsCompatibleWith( + << DataTypeString(dataset_->output_dtypes()[output_index]) + << ", got " << DataTypeString(serialized_sparse.dtype()) + << ")."; + DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith( serialized_sparse.shape())) << "Got wrong shape for FastParseExample return value " << d << " (expected " - << output_shapes()[output_index].DebugString() << ", got " - << serialized_sparse.shape().DebugString() << ")."; + << dataset_->output_shapes()[output_index].DebugString() + << ", got " << serialized_sparse.shape().DebugString() + << ")."; (*result)[output_index] = serialized_sparse; } // TODO(b/111553342): User provided tags instead of fixed tag. @@ -268,73 +346,12 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { } done(s); }); - }; - - return NewParallelMapIterator( - {this, strings::StrCat(prefix, "::ParseExample")}, input_, - std::move(map_fn), num_parallel_calls_); - } - - const DataTypeVector& output_dtypes() const override { - return output_types_; - } - - const std::vector& output_shapes() const override { - return output_shapes_; - } - - string DebugString() const override { - return "ParseExampleDatasetOp::Dataset"; - } - - protected: - Status AsGraphDefInternal(SerializationContext* ctx, - DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); - - Node* num_parallle_calls_node; - std::vector dense_defaults_nodes; - dense_defaults_nodes.reserve(dense_defaults_.size()); - - TF_RETURN_IF_ERROR( - b->AddScalar(num_parallel_calls_, &num_parallle_calls_node)); - - for (const Tensor& dense_default : dense_defaults_) { - Node* node; - TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node)); - dense_defaults_nodes.emplace_back(node); } - AttrValue sparse_keys_attr; - AttrValue dense_keys_attr; - AttrValue sparse_types_attr; - AttrValue dense_attr; - AttrValue dense_shapes_attr; - - b->BuildAttrValue(sparse_keys_, &sparse_keys_attr); - b->BuildAttrValue(dense_keys_, &dense_keys_attr); - b->BuildAttrValue(sparse_types_, &sparse_types_attr); - b->BuildAttrValue(dense_types_, &dense_attr); - b->BuildAttrValue(dense_shapes_, &dense_shapes_attr); - - TF_RETURN_IF_ERROR(b->AddDataset(this, - { - {0, input_graph_node}, - {1, num_parallle_calls_node}, - }, - {{2, dense_defaults_nodes}}, - {{"sparse_keys", sparse_keys_attr}, - {"dense_keys", dense_keys_attr}, - {"sparse_types", sparse_types_attr}, - {"Tdense", dense_attr}, - {"dense_shapes", dense_shapes_attr}}, - output)); - return Status::OK(); - } + private: + const Dataset* dataset_; + }; - private: const DatasetBase* const input_; const std::vector dense_defaults_; const std::vector sparse_keys_; diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc index dbe31f37b8..d9fdd59bf0 100644 --- a/tensorflow/core/kernels/data/scan_dataset_op.cc +++ b/tensorflow/core/kernels/data/scan_dataset_op.cc @@ -144,7 +144,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate(ctx); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); } Status GetNextInternal(IteratorContext* ctx, @@ -169,8 +170,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { state_and_output.reserve(dataset()->state_types_.size() + output_dtypes().size()); - Status s = dataset()->captured_func_->Run(ctx, std::move(args), - &state_and_output); + Status s = instantiated_captured_func_->Run(ctx, std::move(args), + &state_and_output); if (s.ok()) { state_.clear(); size_t i = 0; @@ -247,6 +248,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); std::vector state_ GUARDED_BY(mu_); + std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; -- GitLab From 6d67ba41f566e963e2c061ca7df63edad89e1fca Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 18 Sep 2018 18:56:55 +0300 Subject: [PATCH 0327/1357] Work out the endianness statically. --- tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h index 5b42de4c5a..484cc4d6f5 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -20,12 +20,12 @@ limitations under the License. namespace tensorflow { +constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + class ByteSwapper { public: ByteSwapper(bool big_endian) { - int x = 1; - bool is_little_endian = (*(char *)&x == 1); - swap_ = big_endian == is_little_endian; + swap_ = big_endian == kLittleEndian; } inline void SwapIfRequiredInt16(int16_t *x) const { -- GitLab From 0c8a8289da120ee353c4fba5decb0bea9014e0a7 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 18 Sep 2018 09:28:42 -0700 Subject: [PATCH 0328/1357] Extend template expansion support for arithmetic expressions. PiperOrigin-RevId: 213462334 --- tensorflow/python/autograph/pyct/templates.py | 11 +++++++++++ tensorflow/python/autograph/pyct/templates_test.py | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tensorflow/python/autograph/pyct/templates.py b/tensorflow/python/autograph/pyct/templates.py index 68c2a35fac..1bf0515745 100644 --- a/tensorflow/python/autograph/pyct/templates.py +++ b/tensorflow/python/autograph/pyct/templates.py @@ -109,6 +109,7 @@ class ReplaceTransformer(gast.NodeTransformer): if not node.ctx: raise ValueError('node %s is missing ctx value' % node) + # TODO(mdan): Rewrite _check and _set using a separate transformer. def _check_inner_children_have_context(self, node): if isinstance(node, gast.Attribute): self._check_inner_children_have_context(node.value) @@ -131,6 +132,11 @@ class ReplaceTransformer(gast.NodeTransformer): self._check_inner_children_have_context(node.upper) if node.step: self._check_inner_children_have_context(node.step) + elif isinstance(node, gast.BinOp): + self._check_inner_children_have_context(node.left) + self._check_inner_children_have_context(node.right) + elif isinstance(node, gast.UnaryOp): + self._check_inner_children_have_context(node.operand) elif isinstance(node, gast.Name): self._check_has_context(node) elif isinstance(node, (gast.Str, gast.Num)): @@ -166,6 +172,11 @@ class ReplaceTransformer(gast.NodeTransformer): elif isinstance(node, gast.Subscript): self._set_inner_child_context(node.value, ctx) self._check_inner_children_have_context(node.slice) + elif isinstance(node, gast.BinOp): + self._check_inner_children_have_context(node.left) + self._check_inner_children_have_context(node.right) + elif isinstance(node, gast.UnaryOp): + self._check_inner_children_have_context(node.operand) elif isinstance(node, (gast.Str, gast.Num)): pass else: diff --git a/tensorflow/python/autograph/pyct/templates_test.py b/tensorflow/python/autograph/pyct/templates_test.py index 66268cfaad..078d9a149b 100644 --- a/tensorflow/python/autograph/pyct/templates_test.py +++ b/tensorflow/python/autograph/pyct/templates_test.py @@ -132,6 +132,18 @@ class TemplatesTest(test.TestCase): self.assertIsInstance(node.body[0].targets[0].elts[0].ctx, gast.Store) self.assertIsInstance(node.body[0].targets[0].elts[1].ctx, gast.Store) + def test_replace_expression_context(self): + template = """ + def test_fn(foo): + foo + """ + + node = templates.replace( + template, foo=parser.parse_expression('a + 2 * b / -c'))[0] + self.assertIsInstance(node.body[0].ctx, gast.Load) + self.assertIsInstance(node.body[0].left.ctx, gast.Load) + self.assertIsInstance(node.body[0].right.left.right.ctx, gast.Load) + def test_replace_complex_context(self): template = """ def test_fn(foo): -- GitLab From 30f28a7f44f39cb8f24fde17252c3e2539c22bb0 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 18 Sep 2018 09:52:03 -0700 Subject: [PATCH 0329/1357] change per code style check --- tensorflow/core/util/mkl_util.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 5ea8f2ee47..387e5ee5a6 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ -#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#ifndef TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#define TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ #ifdef INTEL_MKL #include @@ -2040,8 +2040,8 @@ class MklPrimitiveFactory { /// Fuction to check whether primitive memory optimization is enabled static inline bool IsPrimitiveMemOptEnabled() { bool is_primitive_mem_opt_enabled = true; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", true, - &is_primitive_mem_opt_enabled)); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", + true, &is_primitive_mem_opt_enabled)); return is_primitive_mem_opt_enabled; } @@ -2098,7 +2098,7 @@ static inline memory::format get_desired_format(int channel, (channel % 8) == 0) { fmt_desired = is_2d ? memory::format::nChw8c - : memory::format::ncdhw; //not support avx2 for 3d yet. + : memory::format::ncdhw; // not support avx2 for 3d yet. } else { fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw; } @@ -2210,7 +2210,8 @@ inline primitive FindOrCreateReorder(const memory* from, const memory* to) { // utility function to determine if it is conv 1x1 and stride != 1 // for purpose of temporarily disabling primitive reuse -inline bool IsConv1x1StrideNot1(memory::dims filter_dims, memory::dims strides) { +inline bool IsConv1x1StrideNot1(memory::dims filter_dims, + memory::dims strides) { if (filter_dims.size() != 4 || strides.size() != 2) return false; return ((filter_dims[2] == 1) && (filter_dims[3] == 1) && @@ -2221,4 +2222,4 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims, memory::dims strides) } // namespace tensorflow #endif // INTEL_MKL -#endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#endif // TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ -- GitLab From d0f6f7733f46d973326187ee4eafb6b9e94b25a1 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Tue, 18 Sep 2018 10:51:19 -0700 Subject: [PATCH 0330/1357] Adding #error when compiling for MKL ML Only, this has been defeatured --- tensorflow/core/util/mkl_util.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 680211edff..cf7ffd8149 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -34,9 +34,8 @@ limitations under the License. #endif #ifdef INTEL_MKL_ML_ONLY -// Using pragma message since #warning doesn't work with all compilers -#pragma message("Compiling for INTEL MKL ML only will be deprecated soon.") -#pragma message("Please use MKL DNN (the default option for --config=mkl)") +#error \ + "Compiling for INTEL MKL ML only is no longer supported.Please use MKL DNN (the default option for --config=mkl)" #endif #ifdef INTEL_MKL_ML_ONLY -- GitLab From 14e9345a88b08f5d2a12f3f441b1d82c041d7ea3 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 18 Sep 2018 18:23:52 +0000 Subject: [PATCH 0331/1357] Avoid saving sensitive information in graph. --- .../ignite/kernels/ignite_dataset_ops.cc | 30 ++------- tensorflow/contrib/ignite/ops/dataset_ops.cc | 10 --- .../ignite/python/ops/ignite_dataset_ops.py | 18 +---- .../python/tests/ignite_dataset_test.py | 66 ++++++++++++++----- 4 files changed, 56 insertions(+), 68 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index e48fce4ed2..bdaed72387 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -125,35 +125,15 @@ class IgniteDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "page_size", &page_size)); - if (env_username) - username = string(env_username); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "username", &username)); + if (env_username) username = string(env_username); - if (env_password) - password = string(env_password); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "password", &password)); + if (env_password) password = string(env_password); - if (env_certfile) - certfile = string(env_certfile); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "certfile", &certfile)); + if (env_certfile) certfile = string(env_certfile); - if (env_keyfile) - keyfile = string(env_keyfile); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "keyfile", &keyfile)); + if (env_keyfile) keyfile = string(env_keyfile); - if (env_cert_password) - cert_password = string(env_cert_password); - else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", - &cert_password)); + if (env_cert_password) cert_password = string(env_cert_password); const Tensor* schema_tensor; OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc index 7d18df11aa..3d6fbe00e6 100644 --- a/tensorflow/contrib/ignite/ops/dataset_ops.cc +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -26,11 +26,6 @@ REGISTER_OP("IgniteDataset") .Input("local: bool") .Input("part: int32") .Input("page_size: int32") - .Input("username: string") - .Input("password: string") - .Input("certfile: string") - .Input("keyfile: string") - .Input("cert_password: string") .Input("schema: int32") .Input("permutation: int32") .Output("handle: variant") @@ -54,11 +49,6 @@ port: Ignite Thin Client Port. local: Local flag that defines that data should be fetched from local host only. part: Partition data should be fetched from. page_size: Page size for Ignite Thin Client. -username: Username to authenticate via Ignite Thin Client. -password: Password to authenticate via Ignite Thin Client. -certfile: SSL certificate to establish SSL connection. -keyfile: Private key file to establish SSL connection. -cert_password: SSL certificate password to establish SSL connection. schema: Internal structure that defines schema of cache objects. permutation: Internal structure that defines permutation of cache objects. )doc"); diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py index c0e24b1c69..7fc9e1fdd1 100644 --- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -732,18 +732,6 @@ class IgniteDataset(Dataset): self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32, name="page_size") - self.username = ops.convert_to_tensor("" if username is None else username, - dtype=dtypes.string, name="username") - self.password = ops.convert_to_tensor("" if password is None else password, - dtype=dtypes.string, name="password") - self.certfile = ops.convert_to_tensor("" if certfile is None else certfile, - dtype=dtypes.string, name="certfile") - self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile, - dtype=dtypes.string, name="keyfile") - self.cert_password = ops.convert_to_tensor("" if cert_password is None - else cert_password, - dtype=dtypes.string, - name="cert_password") self.schema = ops.convert_to_tensor(self.cache_type.to_flat(), dtype=dtypes.int32, name="schema") self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(), @@ -753,10 +741,8 @@ class IgniteDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.ignite_dataset(self.cache_name, self.host, self.port, self.local, self.part, - self.page_size, self.username, - self.password, self.certfile, - self.keyfile, self.cert_password, - self.schema, self.permutation) + self.page_size, self.schema, + self.permutation) @property def output_classes(self): diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py index 933e62b804..5d74617690 100644 --- a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py +++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py @@ -35,28 +35,60 @@ class IgniteDatasetTest(test.TestCase): """ def test_ignite_dataset_with_plain_client(self): + """Test Ignite Dataset with plain client. + """ + self._clear_env() ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300) - self.__check_dataset(ds) + self._check_dataset(ds) def test_ignite_dataset_with_ssl_client(self): - ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\ - certfile=os.path.dirname(os.path.realpath(__file__)) +\ - "/keystore/client.pem", cert_password="123456") - self.__check_dataset(ds) + """Test Ignite Dataset with ssl client. + """ + self._clear_env() + os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname( + os.path.realpath(__file__)) + "/keystore/client.pem" + os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456" + + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301, + certfile=os.environ["IGNITE_DATASET_CERTFILE"], + cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"]) + self._check_dataset(ds) def test_ignite_dataset_with_ssl_client_and_auth(self): - ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\ - certfile=os.path.dirname(os.path.realpath(__file__)) +\ - "/keystore/client.pem", cert_password="123456",\ - username="ignite", password="ignite") - self.__check_dataset(ds) + """Test Ignite Dataset with ssl client and authentication. + """ + self._clear_env() + os.environ['IGNITE_DATASET_USERNAME'] = "ignite" + os.environ['IGNITE_DATASET_PASSWORD'] = "ignite" + os.environ['IGNITE_DATASET_CERTFILE'] = os.path.dirname( + os.path.realpath(__file__)) + "/keystore/client.pem" + os.environ['IGNITE_DATASET_CERT_PASSWORD'] = "123456" + + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302, + certfile=os.environ['IGNITE_DATASET_CERTFILE'], + cert_password=os.environ['IGNITE_DATASET_CERT_PASSWORD'], + username=os.environ['IGNITE_DATASET_USERNAME'], + password=os.environ['IGNITE_DATASET_PASSWORD']) + self._check_dataset(ds) + + def _clear_env(self): + """Clears environment variables used by Ignite Dataset. + """ + if 'IGNITE_DATASET_USERNAME' in os.environ: + del os.environ['IGNITE_DATASET_USERNAME'] + if 'IGNITE_DATASET_PASSWORD' in os.environ: + del os.environ['IGNITE_DATASET_PASSWORD'] + if 'IGNITE_DATASET_CERTFILE' in os.environ: + del os.environ['IGNITE_DATASET_CERTFILE'] + if 'IGNITE_DATASET_CERT_PASSWORD' in os.environ: + del os.environ['IGNITE_DATASET_CERT_PASSWORD'] - def __check_dataset(self, dataset): + def _check_dataset(self, dataset): """Checks that dataset provids correct data. """ - self.assertEquals(tf.int64, dataset.output_types['key']) - self.assertEquals(tf.string, dataset.output_types['val']['NAME']) - self.assertEquals(tf.int64, dataset.output_types['val']['VAL']) + self.assertEqual(tf.int64, dataset.output_types['key']) + self.assertEqual(tf.string, dataset.output_types['val']['NAME']) + self.assertEqual(tf.int64, dataset.output_types['val']['VAL']) it = dataset.make_one_shot_iterator() ne = it.get_next() @@ -66,11 +98,11 @@ class IgniteDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(ne) - self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\ + self.assertEqual({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\ rows[0]) - self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\ + self.assertEqual({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\ rows[1]) - self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\ + self.assertEqual({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\ rows[2]) if __name__ == "__main__": -- GitLab From effced8f591441e0706377e2b31debb96ee9203d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Kr=C3=B6ger?= Date: Tue, 18 Sep 2018 21:14:23 +0200 Subject: [PATCH 0332/1357] Moved example and changed wording --- tensorflow/python/data/ops/dataset_ops.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 2fc41a3b98..1b9ea2ed08 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1009,11 +1009,8 @@ class Dataset(object): def flat_map(self, map_func): """Maps `map_func` across this dataset and flattens the result. - `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since - `flat_map` produces a similar outputs as `tf.data.Dataset.interleave(cycle_length=1)` - Use `flat_map` if you want to make sure, that the order of your dataset stays the same. - For example, to implement unbatch: + For example, to flatten a dataset of batches into a dataset of their elements: ```python # NOTE: The following examples use `{ ... }` to represent the @@ -1023,6 +1020,10 @@ class Dataset(object): a.flat_map(lambda x: Dataset.from_tensor_slices(x)) == {[1,2,3,4,5,6,7,8,9,10]} ``` + + `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since + `flat_map` produces the same output as `tf.data.Dataset.interleave(cycle_length=1)` + Args: map_func: A function mapping a nested structure of tensors (having shapes and types defined by `self.output_shapes` and `self.output_types`) to a -- GitLab From a1ffaf3620801af2a7559b0ee393f962fb6ed7ae Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 18 Sep 2018 12:40:49 -0700 Subject: [PATCH 0333/1357] [SE] Restore int8x4 data types if that's the requested DataLayout for fused conv This broke in a recent refactoring. PiperOrigin-RevId: 213497416 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 38 ++++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 63ab367086..3a77ba769c 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -132,30 +132,39 @@ string ToString(cudnnStatus_t status) { } template -cudnnDataType_t GetCudnnDataType(); +cudnnDataType_t GetCudnnDataType( + dnn::DataLayout = dnn::DataLayout::kBatchDepthYX); template <> -cudnnDataType_t GetCudnnDataType() { +cudnnDataType_t GetCudnnDataType(dnn::DataLayout) { return CUDNN_DATA_DOUBLE; } template <> -cudnnDataType_t GetCudnnDataType() { +cudnnDataType_t GetCudnnDataType(dnn::DataLayout) { return CUDNN_DATA_FLOAT; } template <> -cudnnDataType_t GetCudnnDataType() { +cudnnDataType_t GetCudnnDataType(dnn::DataLayout) { return CUDNN_DATA_HALF; } template <> -cudnnDataType_t GetCudnnDataType() { - return CUDNN_DATA_INT8; +cudnnDataType_t GetCudnnDataType(dnn::DataLayout layout) { + switch (layout) { + case dnn::DataLayout::kYXDepthBatch: + case dnn::DataLayout::kYXBatchDepth: + case dnn::DataLayout::kBatchYXDepth: + case dnn::DataLayout::kBatchDepthYX: + return CUDNN_DATA_INT8; + case dnn::DataLayout::kBatchDepthYX4: + return CUDNN_DATA_INT8x4; + } } template <> -cudnnDataType_t GetCudnnDataType() { +cudnnDataType_t GetCudnnDataType(dnn::DataLayout) { return CUDNN_DATA_INT32; } @@ -2518,12 +2527,15 @@ port::Status CudnnSupport::DoFusedConvolveImpl( "Relu or None activation."); } - CudnnTensorDescriptor conv_input_nd(conv_input_descriptor, - GetCudnnDataType()); - CudnnTensorDescriptor output_nd(output_descriptor, - GetCudnnDataType()); - CudnnFilterDescriptor filter(filter_descriptor, - GetCudnnDataType()); + CudnnTensorDescriptor conv_input_nd( + conv_input_descriptor, + GetCudnnDataType(conv_input_descriptor.layout())); + CudnnTensorDescriptor output_nd( + output_descriptor, + GetCudnnDataType(conv_input_descriptor.layout())); + CudnnFilterDescriptor filter( + filter_descriptor, + GetCudnnDataType(conv_input_descriptor.layout())); CudnnTensorDescriptor bias_nd(bias_descriptor, GetCudnnDataType()); CudnnConvolutionDescriptor conv(convolution_descriptor, GetCudnnDataType()); -- GitLab From 723242c800f237368e238fe03bd50516807e3402 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Tue, 18 Sep 2018 12:55:44 -0700 Subject: [PATCH 0334/1357] Link to readme for distribution strategy from distribute.py and package init file, so that folks looking at API documentation can find the readme as well. PiperOrigin-RevId: 213499832 --- tensorflow/contrib/distribute/__init__.py | 7 ++++++- tensorflow/python/training/distribute.py | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/distribute/__init__.py b/tensorflow/contrib/distribute/__init__.py index 350f81f60f..823fe6a917 100644 --- a/tensorflow/contrib/distribute/__init__.py +++ b/tensorflow/contrib/distribute/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Prototype of a distributed computation library for TF.""" +"""A distributed computation library for TF. + +See [tensorflow/contrib/distribute/README.md]( +https://www.tensorflow.org/code/tensorflow/contrib/distribute/README.md) +for overview and examples. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 21ca1735e0..419a9ec12b 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -195,6 +195,10 @@ class _SameScopeAgainContext(object): class DistributionStrategy(object): """A list of devices with a state & compute distribution policy. + See [tensorflow/contrib/distribute/README.md]( + https://www.tensorflow.org/code/tensorflow/contrib/distribute/README.md) + for overview and examples. + The intent is that you can write an algorithm in a stylized way and it will be usable with a variety of different `DistributionStrategy` implementations. Each descendant will implement a different strategy -- GitLab From e8be4d96dd4d3d9d6b12b778a5b8beee592a324a Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 18 Sep 2018 12:59:39 -0700 Subject: [PATCH 0335/1357] Only start_step/end_step on GradientTape if executing eagerly. This prevents creating a context where none is required. PiperOrigin-RevId: 213500408 --- tensorflow/python/eager/backprop.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 907234b0f8..50a6ce6324 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -725,7 +725,9 @@ class GradientTape(object): self._persistent = persistent self._watch_accessed_variables = watch_accessed_variables self._recording = False - context.context().start_step() + self._created_eagerly = context.executing_eagerly() + if self._created_eagerly: + context.context().start_step() def __enter__(self): """Enters a context inside which operations are recorded on this tape.""" @@ -755,7 +757,8 @@ class GradientTape(object): self._recording = False def __del__(self): - context.context().end_step() + if self._created_eagerly: + context.context().end_step() def watch(self, tensor): """Ensures that `tensor` is being traced by this tape. -- GitLab From 199cb7746c7ad92d7be344363d8276c45fc7a4b8 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 18 Sep 2018 13:05:23 -0700 Subject: [PATCH 0336/1357] Register FakeResourceUpdateOp for the right op Before this CL the PartiallyDeclusterPassTest.DontDuplicateResourceVarOps test was buggy, in that it wasn't testing what it was supposed to test. PiperOrigin-RevId: 213501558 --- .../compiler/jit/partially_decluster_pass_test.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc index 35872daa65..0feb73a89e 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc @@ -60,9 +60,9 @@ class FakeBinaryOp : public OpKernel { void Compute(OpKernelContext* ctx) override { CHECK(false); } }; -class FakeResourceVarUpdateOp : public OpKernel { +class FakeResourceUpdateOp : public OpKernel { public: - explicit FakeResourceVarUpdateOp(OpKernelConstruction* context) + explicit FakeResourceUpdateOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* ctx) override { CHECK(false); } @@ -74,10 +74,9 @@ REGISTER_KERNEL_BUILDER(Name("FakeBinary") .HostMemory("host_out"), FakeBinaryOp); -REGISTER_KERNEL_BUILDER(Name("FakeResourceVarUpdate") - .Device(DEVICE_CPU) - .HostMemory("something_else"), - FakeResourceVarUpdateOp); +REGISTER_KERNEL_BUILDER( + Name("FakeResourceUpdate").Device(DEVICE_CPU).HostMemory("something_else"), + FakeResourceUpdateOp); Status PartiallyDecluster(std::unique_ptr* graph) { FixupSourceAndSinkEdges(graph->get()); -- GitLab From 33170cc661f3838aa7d0d7fc19bb0c6ba4812a3c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 18 Sep 2018 13:28:38 -0700 Subject: [PATCH 0337/1357] Eliminate VisitableAllocator. The visitor pattern is used to allow pre-registration of memory for DMA access, e.g. for fast GPU/CPU i/o and for RDMA networking. The VisitableAllocator interface was introduced to support this use some time ago, prior to SubAllocators. Memory registration works best if it's done infrequently, on large pieces of memory, rather than on every piece that's dynamically allocated/freed. This usage pattern fits the SubAllocator better than a general Allocator. This change moves memory allocation visitor access to SubAllocator and eliminates the VisitableAllocator subclass of Allocator. This change also more rigorously enforces the requirement that all Visitors be declared prior to memory allocation begining. This is accomplished by requiring that Visitors be provided to the SubAllocator constructor. This refactoring will ease an upcoming CL introducing NUMA specific CPU devices. It also should fix some performance pitfalls (e.g. accidental use of PoolAllocator) introduced by an earlier refactoring of ProcessState that was also in preparation for NUMA. It restores the default use of the cpu_allocator() value (i.e. no SubAllocator) by model executions that don't use allocation visitors (since visitor registration must precede the first allocation, hence can be detected at that time). PiperOrigin-RevId: 213505655 --- tensorflow/contrib/gdr/gdr_memory_manager.cc | 102 +++++------ tensorflow/contrib/verbs/rdma_mgr.cc | 81 +++------ tensorflow/contrib/verbs/rdma_mgr.h | 1 + tensorflow/contrib/verbs/verbs_server_lib.cc | 5 + tensorflow/core/BUILD | 1 - .../core/common_runtime/bfc_allocator.cc | 21 +-- .../core/common_runtime/bfc_allocator.h | 14 +- .../common_runtime/gpu/cuda_host_allocator.h | 12 +- .../common_runtime/gpu/gpu_bfc_allocator.cc | 17 +- .../common_runtime/gpu/gpu_bfc_allocator.h | 44 +++-- .../gpu/gpu_bfc_allocator_test.cc | 90 ++++++++-- .../gpu/gpu_cudamalloc_allocator.cc | 10 +- .../gpu/gpu_cudamalloc_allocator.h | 11 +- .../common_runtime/gpu/gpu_debug_allocator.cc | 20 +-- .../common_runtime/gpu/gpu_debug_allocator.h | 20 +-- .../gpu/gpu_debug_allocator_test.cc | 35 +++- .../core/common_runtime/gpu/gpu_device.cc | 64 ++++--- .../core/common_runtime/gpu/gpu_device.h | 9 +- .../common_runtime/gpu/gpu_process_state.cc | 161 +++++++++++------- .../common_runtime/gpu/gpu_process_state.h | 58 ++++--- .../common_runtime/gpu/pool_allocator_test.cc | 68 ++++++-- .../core/common_runtime/mkl_cpu_allocator.h | 50 +----- .../core/common_runtime/pool_allocator.cc | 45 ++--- .../core/common_runtime/pool_allocator.h | 27 +-- .../core/common_runtime/process_state.cc | 71 ++++++-- .../core/common_runtime/process_state.h | 15 +- .../core/common_runtime/renamed_device.h | 7 +- .../core/common_runtime/visitable_allocator.h | 79 --------- tensorflow/core/framework/allocator.cc | 20 ++- tensorflow/core/framework/allocator.h | 28 ++- tensorflow/core/framework/device_base.h | 10 +- tensorflow/core/framework/op_kernel.cc | 9 +- 32 files changed, 628 insertions(+), 577 deletions(-) delete mode 100644 tensorflow/core/common_runtime/visitable_allocator.h diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index 726f74c7b7..bb06f1c41c 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -138,6 +138,8 @@ class GdrMemoryManager : public RemoteMemoryManager { Device* device, DeviceContext* device_context, bool on_host, StatusCallback done) override; + static void RegMemVisitors(); + protected: Status CreateEndpoint(const string& host, const string& port, RdmaEndpointPtr& endpoint); @@ -183,35 +185,51 @@ class GdrMemoryManager : public RemoteMemoryManager { TF_DISALLOW_COPY_AND_ASSIGN(GdrMemoryManager); }; -// TODO(byronyi): remove this class and its registration when the default -// cpu_allocator() returns visitable allocator, or cpu_allocator() is no -// longer in use. -class BFCGdrAllocator : public BFCAllocator { - public: - BFCGdrAllocator() - : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, - true, "cpu_gdr_bfc") {} -}; -class BFCGdrAllocatorFactory : public AllocatorFactory { - public: - Allocator* CreateAllocator() override { return new BFCGdrAllocator; } - - virtual SubAllocator* CreateSubAllocator(int numa_node) { - return new BasicCPUAllocator(numa_node); - } -}; - -REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 102, BFCGdrAllocatorFactory); - GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) : host_(host), port_(port), listening_(nullptr, EndpointDeleter), stopped_(true), - next_key_(0) {} + next_key_(0) { + static std::once_flag flag; + std::call_once(flag, []() { RegMemVisitors(); }); +} GdrMemoryManager::~GdrMemoryManager() { close(epfd_); } +/*static*/ void GdrMemoryManager::RegMemVisitors() { + SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + GdrMemoryManager::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes); + }; + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); + +#if GOOGLE_CUDA + if (IsGDRAvailable()) { + int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; + + // Note we don't free allocated GPU memory so there is no free visitor + SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); + LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; + } +#endif // GOOGLE_CUDA +} + Status GdrMemoryManager::Init() { epfd_ = epoll_create1(0); if (epfd_ == -1) { @@ -271,48 +289,6 @@ Status GdrMemoryManager::Init() { "cannot add server to epoll"); } - Allocator* allocators[] = { -#if GOOGLE_CUDA - GPUProcessState::singleton()->GetCUDAHostAllocator(0), -#endif // GOOGLE_CUDA - ProcessState::singleton()->GetCPUAllocator(0), - cpu_allocator(), - }; - - using namespace std::placeholders; - VisitableAllocator::Visitor alloc_visitor = - std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); - VisitableAllocator::Visitor free_visitor = - std::bind(&GdrMemoryManager::EvictMemoryRegion, this, _1, _2); - - std::set instrumented_; - - // Host memory allocators - for (Allocator* allocator : allocators) { - auto* visitable_allocator = dynamic_cast(allocator); - CHECK(visitable_allocator) - << "is not visitable for instrumentation" << allocator->Name(); - // Make sure we don't instrument the same allocator twice - if (instrumented_.find(allocator) == std::end(instrumented_)) { - visitable_allocator->AddAllocVisitor(alloc_visitor); - visitable_allocator->AddFreeVisitor(free_visitor); - instrumented_.insert(allocator); - LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); - } - } - -#if GOOGLE_CUDA - VisitableAllocator::Visitor cuda_alloc_visitor = - std::bind(&GdrMemoryManager::InsertMemoryRegion, this, _1, _2); - if (IsGDRAvailable()) { - // Note we don't free allocated GPU memory so there is no free visitor - int32_t bus_id = TryToReadNumaNode(listening_->verbs->device) + 1; - GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, - cuda_alloc_visitor); - LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; - } -#endif // GOOGLE_CUDA - return Status::OK(); } diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 3cb5e61fac..2784bf124c 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include "tensorflow/contrib/verbs/grpc_verbs_client.h" #include "tensorflow/contrib/verbs/verbs_service.pb.h" -#include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/pool_allocator.h" @@ -29,6 +28,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/strcat.h" namespace tensorflow { @@ -256,74 +256,41 @@ void MRDeleter(ibv_mr* mr) { } } -// TODO(byronyi): remove this class and its registration when the default -// cpu_allocator() returns visitable allocator, or cpu_allocator() is no -// longer in use. -class BFCRdmaAllocator : public BFCAllocator { - public: - BFCRdmaAllocator() - : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, - true, "cpu_rdma_bfc") {} -}; -class BFCRdmaAllocatorFactory : public AllocatorFactory { - public: - Allocator* CreateAllocator() { return new BFCRdmaAllocator; } - - SubAllocator* CreateSubAllocator(int numa_node) { - return new BasicCPUAllocator(numa_node); - } -}; - -REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory); - void RdmaMgr::InitAllocators() { - RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; + static std::once_flag flag; + std::call_once( + flag, [this]() { RdmaMemoryMgr::Singleton().pd_ = rdma_adapter_->pd_; }); +} - Allocator* allocators[] = { -#if GOOGLE_CUDA - GPUProcessState::singleton()->GetCUDAHostAllocator(0), -#endif // GOOGLE_CUDA - ProcessState::singleton()->GetCPUAllocator(0), - cpu_allocator(), +/*static*/ void RdmaMgr::RegMemVisitors() { + SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().EvictMemoryRegion(ptr, num_bytes); }; - using namespace std::placeholders; - - std::set instrumented_; - - // Host memory allocators - for (Allocator* allocator : allocators) { - VisitableAllocator::Visitor alloc_visitor = - std::bind(&RdmaMemoryMgr::InsertMemoryRegion, - &RdmaMemoryMgr::Singleton(), _1, _2, allocator->Name()); - VisitableAllocator::Visitor free_visitor = std::bind( - &RdmaMemoryMgr::EvictMemoryRegion, &RdmaMemoryMgr::Singleton(), _1, _2); - - auto* visitable_allocator = dynamic_cast(allocator); - CHECK(visitable_allocator) - << "is not visitable for instrumentation" << allocator->Name(); - // Make sure we don't instrument the same allocator twice - if (instrumented_.find(allocator) == std::end(instrumented_)) { - visitable_allocator->AddAllocVisitor(alloc_visitor); - visitable_allocator->AddFreeVisitor(free_visitor); - instrumented_.insert(allocator); - LOG(INFO) << "Instrumenting CPU allocator " << allocator->Name(); - } - } + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); #if GOOGLE_CUDA if (IsGDRAvailable()) { // Note we don't free allocated GPU memory so there is no free visitor int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - char buf[8]; - sprintf(buf, "gpu"); - VisitableAllocator::Visitor cuda_alloc_visitor = - std::bind(&RdmaMemoryMgr::InsertMemoryRegion, - &RdmaMemoryMgr::Singleton(), _1, _2, std::string(buf)); - + SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, + size_t num_bytes) { + RdmaMemoryMgr::Singleton().InsertMemoryRegion( + ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; } #endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index 9fffc335bb..74b92cc9a6 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -39,6 +39,7 @@ class RdmaMgr { void SetupChannels(); bool ConnectivityCheck(); void InitAllocators(); + static void RegMemVisitors(); const string& local_worker() { return local_worker_; } private: diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc index 1a0b5028fe..61469686e4 100644 --- a/tensorflow/contrib/verbs/verbs_server_lib.cc +++ b/tensorflow/contrib/verbs/verbs_server_lib.cc @@ -76,8 +76,13 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def, return Status::OK(); } +namespace { +std::once_call reg_mem_visitors_call; +} // namespace + Status VerbsServer::Init(ServiceInitFunction service_func, RendezvousMgrCreationFunction rendezvous_mgr_func) { + std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); }); Status s = GrpcServer::Init(service_func, rendezvous_mgr_func); { mutex_lock l(mu_); diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d55bd8d7ed..9bcf5b0865 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2783,7 +2783,6 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/step_stats_collector.h", "common_runtime/threadpool_device.h", "common_runtime/tracing_device.h", - "common_runtime/visitable_allocator.h", "common_runtime/process_state.h", "common_runtime/pool_allocator.h", "graph/gradients.h", diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 84c6285bbe..3843ea9e60 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -31,7 +31,7 @@ namespace tensorflow { BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, bool allow_growth, const string& name) - : suballocator_(sub_allocator), + : sub_allocator_(sub_allocator), name_(name), free_chunks_list_(kInvalidChunkHandle), next_allocation_id_(1) { @@ -72,7 +72,7 @@ BFCAllocator::~BFCAllocator() { VLOG(2) << "Number of regions allocated: " << region_manager_.regions().size(); for (const auto& region : region_manager_.regions()) { - suballocator_->Free(region.ptr(), region.memory_size()); + sub_allocator_->Free(region.ptr(), region.memory_size()); } for (BinNum b = 0; b < kNumBins; b++) { @@ -108,7 +108,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = suballocator_->Alloc(alignment, bytes); + void* mem_addr = sub_allocator_->Alloc(alignment, bytes); if (mem_addr == nullptr && !started_backpedal_) { // Only backpedal once. started_backpedal_ = true; @@ -119,7 +119,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { while (mem_addr == nullptr) { bytes = RoundedBytes(bytes * kBackpedalFactor); if (bytes < rounded_bytes) break; - mem_addr = suballocator_->Alloc(alignment, bytes); + mem_addr = sub_allocator_->Alloc(alignment, bytes); } } @@ -158,10 +158,6 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) { // Insert the chunk into the right bin. InsertFreeChunkIntoBin(h); - // Invoke visitors on newly allocated region. - for (const auto& visitor : region_visitors_) { - visitor(mem_addr, bytes); - } return true; } @@ -490,15 +486,6 @@ void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) { InsertFreeChunkIntoBin(coalesced_chunk); } -void BFCAllocator::AddAllocVisitor(Visitor visitor) { - VLOG(1) << "AddVisitor"; - mutex_lock l(lock_); - region_visitors_.push_back(visitor); - for (const auto& region : region_manager_.regions()) { - visitor(region.ptr(), region.memory_size()); - } -} - bool BFCAllocator::TracksAllocationSizes() { return true; } size_t BFCAllocator::RequestedSize(const void* ptr) { diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index 20e1dab1d5..364071e066 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -23,7 +23,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/allocator_retry.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/macros.h" @@ -42,7 +42,7 @@ namespace tensorflow { // coalescing. One assumption we make is that the process using this // allocator owns pretty much all of the memory, and that nearly // all requests to allocate memory go through this interface. -class BFCAllocator : public VisitableAllocator { +class BFCAllocator : public Allocator { public: // Takes ownership of sub_allocator. BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, @@ -55,11 +55,6 @@ class BFCAllocator : public VisitableAllocator { const AllocationAttributes& allocation_attr) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - - // Does nothing, because memory is never freed. - void AddFreeVisitor(Visitor visitor) override {} - bool TracksAllocationSizes() override; size_t RequestedSize(const void* ptr) override; @@ -423,7 +418,7 @@ class BFCAllocator : public VisitableAllocator { // of the available memory. bool started_backpedal_ = false; - std::unique_ptr suballocator_; + std::unique_ptr sub_allocator_; string name_; // Structures mutable after construction @@ -435,9 +430,6 @@ class BFCAllocator : public VisitableAllocator { // Pointer to head of linked list of free Chunks ChunkHandle free_chunks_list_ GUARDED_BY(lock_); - // Called once on each region, ASAP. - std::vector region_visitors_ GUARDED_BY(lock_); - // Counter containing the next unique identifier to assign to a // newly-created chunk. int64 next_allocation_id_ GUARDED_BY(lock_); diff --git a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h index 636cd43575..6bd29ef775 100644 --- a/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h +++ b/tensorflow/core/common_runtime/gpu/cuda_host_allocator.h @@ -26,8 +26,12 @@ namespace tensorflow { class CUDAHostAllocator : public SubAllocator { public: // Note: stream_exec cannot be null. - explicit CUDAHostAllocator(se::StreamExecutor* stream_exec) - : stream_exec_(stream_exec) { + explicit CUDAHostAllocator(se::StreamExecutor* stream_exec, int numa_node, + const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), + stream_exec_(stream_exec), + numa_node_(numa_node) { CHECK(stream_exec_ != nullptr); } ~CUDAHostAllocator() override {} @@ -39,19 +43,23 @@ class CUDAHostAllocator : public SubAllocator { if (ptr == nullptr) { LOG(WARNING) << "could not allocate pinned host memory of size: " << num_bytes; + return ptr; } + VisitAlloc(ptr, numa_node_, num_bytes); } return ptr; } void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { + VisitFree(ptr, numa_node_, num_bytes); stream_exec_->HostMemoryDeallocate(ptr); } } private: se::StreamExecutor* stream_exec_; // not owned, non-null + const int numa_node_; TF_DISALLOW_COPY_AND_ASSIGN(CUDAHostAllocator); }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc index 2d4c8d0201..44ffce77a1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc @@ -22,18 +22,15 @@ limitations under the License. namespace tensorflow { -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const string& name) - : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {} +GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator, + size_t total_memory, const string& name) + : GPUBFCAllocator(sub_allocator, total_memory, GPUOptions(), name) {} -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, +GPUBFCAllocator::GPUBFCAllocator(GPUMemAllocator* sub_allocator, + size_t total_memory, const GPUOptions& gpu_options, const string& name) - : BFCAllocator( - new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), - gpu_options.per_process_gpu_memory_fraction() > 1.0 || - gpu_options.experimental().use_unified_memory()), - total_memory, gpu_options.allow_growth(), name) {} + : BFCAllocator(sub_allocator, total_memory, gpu_options.allow_growth(), + name) {} } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index f1cc2eace1..6b6de80734 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -31,28 +31,20 @@ limitations under the License. namespace tensorflow { -// A GPU memory allocator that implements a 'best-fit with coalescing' -// algorithm. -class GPUBFCAllocator : public BFCAllocator { - public: - // 'cuda_gpu_id' refers to the ID of the GPU device within - // the process and must reference a valid ID in the process. - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const string& name); - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const GPUOptions& gpu_options, const string& name); - virtual ~GPUBFCAllocator() {} - - TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); -}; - // Suballocator for GPU memory. class GPUMemAllocator : public SubAllocator { public: + // 'cuda_gpu_id' refers to the ID of the GPU device within + // the process and must reference a valid ID in the process. // Note: stream_exec cannot be null. - explicit GPUMemAllocator(se::StreamExecutor* stream_exec, - bool use_unified_memory) - : stream_exec_(stream_exec), use_unified_memory_(use_unified_memory) { + explicit GPUMemAllocator(se::StreamExecutor* stream_exec, CudaGpuId gpu_id, + bool use_unified_memory, + const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), + stream_exec_(stream_exec), + gpu_id_(gpu_id), + use_unified_memory_(use_unified_memory) { CHECK(stream_exec_ != nullptr); } ~GPUMemAllocator() override {} @@ -65,12 +57,14 @@ class GPUMemAllocator : public SubAllocator { } else { ptr = stream_exec_->AllocateArray(num_bytes).opaque(); } + VisitAlloc(ptr, gpu_id_.value(), num_bytes); } return ptr; } void Free(void* ptr, size_t num_bytes) override { if (ptr != nullptr) { + VisitFree(ptr, gpu_id_.value(), num_bytes); if (use_unified_memory_) { stream_exec_->UnifiedMemoryDeallocate(ptr); } else { @@ -82,11 +76,25 @@ class GPUMemAllocator : public SubAllocator { private: se::StreamExecutor* stream_exec_; // not owned, non-null + const CudaGpuId gpu_id_; const bool use_unified_memory_ = false; TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator); }; +// A GPU memory allocator that implements a 'best-fit with coalescing' +// algorithm. +class GPUBFCAllocator : public BFCAllocator { + public: + GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory, + const string& name); + GPUBFCAllocator(GPUMemAllocator* sub_allocator, size_t total_memory, + const GPUOptions& gpu_options, const string& name); + ~GPUBFCAllocator() override {} + + TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); +}; + } // namespace tensorflow #endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_BFC_ALLOCATOR_H_ diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 67caeb3495..7112c3afd4 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -46,7 +47,11 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use, } TEST(GPUBFCAllocatorTest, NoDups) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); // Allocate a lot of raw pointers @@ -75,7 +80,11 @@ TEST(GPUBFCAllocatorTest, NoDups) { } TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); // Allocate 256 raw pointers of sizes between 100 bytes and about // a meg random::PhiloxRandom philox(123, 17); @@ -133,7 +142,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { } TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); float* first_ptr = a.Allocate(1024); @@ -168,18 +181,30 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { } TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); float* ptr = a.Allocate(0); EXPECT_EQ(nullptr, ptr); } TEST(GPUBFCAllocatorTest, TracksSizes) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); @@ -187,8 +212,12 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { } TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) { + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); // Configure a 1MiB byte limit - GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc"); + GPUBFCAllocator a(sub_allocator, 1 << 20, "GPU_0_bfc"); float* first_ptr = a.Allocate(1 << 6); float* second_ptr = a.Allocate(1 << 20); @@ -203,7 +232,11 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { options.set_allow_growth(true); // Max of 2GiB, but starts out small. - GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1LL << 31, "GPU_0_bfc"); // Allocate 10 raw pointers of sizes between 100 bytes and about // 64 megs. @@ -264,8 +297,15 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { } TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { - GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); - GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1UL << 60, "GPU_0_bfc"); + sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator b(sub_allocator, 1UL << 60, "GPU_0_bfc"); void* amem = a.AllocateRaw(1, 1); void* bmem = b.AllocateRaw(1, 1 << 30); a.DeallocateRaw(amem); @@ -273,7 +313,11 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { } static void BM_Allocation(int iters) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 524288, 512, 1048576, 10485760, 104857600, @@ -289,7 +333,11 @@ static void BM_Allocation(int iters) { BENCHMARK(BM_Allocation); static void BM_AllocationThreaded(int iters, int num_threads) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1uLL << 33, "GPU_0_bfc"); thread::ThreadPool pool(Env::Default(), "test", num_threads); std::atomic_int_fast32_t count(iters); mutex done_lock; @@ -325,7 +373,11 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16); // A more complex benchmark that defers deallocation of an object for // "delay" allocations. static void BM_AllocationDelayed(int iters, int delay) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 4096, 512, 1024, 1024}; int size_index = 0; @@ -363,7 +415,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { // only methods inside this class can access private members of BFCAllocator. void TestBinDebugInfo() { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 << 30, "GPU_0_bfc"); std::vector initial_ptrs; std::vector initial_ptrs_allocated_sizes; @@ -441,7 +497,11 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { } void TestLog2FloorNonZeroSlow() { - GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); + CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUBFCAllocator a(sub_allocator, 1 /* total_memory */, "GPU_0_bfc"); EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0)); EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1)); EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc index 934a57a5fb..8e14f1ea75 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc @@ -27,7 +27,7 @@ limitations under the License. namespace tensorflow { -GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator, +GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -60,14 +60,6 @@ void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) { #endif // GOOGLE_CUDA } -void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) { - return base_allocator_->AddAllocVisitor(visitor); -} - -void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) { - return base_allocator_->AddFreeVisitor(visitor); -} - bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 856fdc34b4..3d1d0ef481 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -19,7 +19,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -29,20 +29,17 @@ namespace tensorflow { // An allocator that wraps a GPU allocator and adds debugging // functionality that verifies that users do not write outside their // allocated memory. -class GPUcudaMallocAllocator : public VisitableAllocator { +class GPUcudaMallocAllocator : public Allocator { public: - explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + explicit GPUcudaMallocAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); ~GPUcudaMallocAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - void AddFreeVisitor(Visitor visitor) override; bool TracksAllocationSizes() override; private: - VisitableAllocator* base_allocator_ = nullptr; // owned + Allocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index e4c834b30d..6bad66dcec 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -73,7 +73,7 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { // ----------------------------------------------------------------------------- // GPUDebugAllocator // ----------------------------------------------------------------------------- -GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator, +GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -111,14 +111,6 @@ void GPUDebugAllocator::DeallocateRaw(void* ptr) { base_allocator_->DeallocateRaw(ptr); } -void GPUDebugAllocator::AddAllocVisitor(Visitor visitor) { - return base_allocator_->AddAllocVisitor(visitor); -} - -void GPUDebugAllocator::AddFreeVisitor(Visitor visitor) { - return base_allocator_->AddFreeVisitor(visitor); -} - bool GPUDebugAllocator::TracksAllocationSizes() { return true; } size_t GPUDebugAllocator::RequestedSize(const void* ptr) { @@ -158,7 +150,7 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) { // ----------------------------------------------------------------------------- // GPUNanResetAllocator // ----------------------------------------------------------------------------- -GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator, +GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id) : base_allocator_(allocator) { stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -200,14 +192,6 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) { base_allocator_->DeallocateRaw(ptr); } -void GPUNanResetAllocator::AddAllocVisitor(Visitor visitor) { - return base_allocator_->AddAllocVisitor(visitor); -} - -void GPUNanResetAllocator::AddFreeVisitor(Visitor visitor) { - return base_allocator_->AddFreeVisitor(visitor); -} - size_t GPUNanResetAllocator::RequestedSize(const void* ptr) { return base_allocator_->RequestedSize(ptr); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index 0f9b72040c..0f27ff4384 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -21,7 +21,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" @@ -31,16 +31,13 @@ namespace tensorflow { // An allocator that wraps a GPU allocator and adds debugging // functionality that verifies that users do not write outside their // allocated memory. -class GPUDebugAllocator : public VisitableAllocator { +class GPUDebugAllocator : public Allocator { public: - explicit GPUDebugAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + explicit GPUDebugAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); ~GPUDebugAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - void AddFreeVisitor(Visitor visitor) override; bool TracksAllocationSizes() override; size_t RequestedSize(const void* ptr) override; size_t AllocatedSize(const void* ptr) override; @@ -53,7 +50,7 @@ class GPUDebugAllocator : public VisitableAllocator { bool CheckFooter(void* ptr); private: - VisitableAllocator* base_allocator_ = nullptr; // owned + Allocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. @@ -63,23 +60,20 @@ class GPUDebugAllocator : public VisitableAllocator { // An allocator that wraps a GPU allocator and resets the memory on // allocation and free to 'NaN', helping to identify cases where the // user forgets to initialize the memory. -class GPUNanResetAllocator : public VisitableAllocator { +class GPUNanResetAllocator : public Allocator { public: - explicit GPUNanResetAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + explicit GPUNanResetAllocator(Allocator* allocator, CudaGpuId cuda_gpu_id); ~GPUNanResetAllocator() override; string Name() override { return "gpu_nan_reset"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; void DeallocateRaw(void* ptr) override; - void AddAllocVisitor(Visitor visitor) override; - void AddFreeVisitor(Visitor visitor) override; size_t RequestedSize(const void* ptr) override; size_t AllocatedSize(const void* ptr) override; void GetStats(AllocatorStats* stats) override; void ClearStats() override; private: - VisitableAllocator* base_allocator_ = nullptr; // owned + Allocator* base_allocator_ = nullptr; // owned se::StreamExecutor* stream_exec_; // Not owned. diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index 236a0afa0b..98283cd846 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -35,7 +35,10 @@ namespace { TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -59,7 +62,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { EXPECT_DEATH( { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + cuda_gpu_id, false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -92,7 +98,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { EXPECT_DEATH( { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + cuda_gpu_id, false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -122,7 +131,10 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { TEST(GPUDebugAllocatorTest, ResetToNan) { const CudaGpuId cuda_gpu_id(0); - GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -163,8 +175,11 @@ TEST(GPUDebugAllocatorTest, ResetToNan) { TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { const CudaGpuId cuda_gpu_id(0); // NaN reset must be the outer-most allocator. + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id), cuda_gpu_id); auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); @@ -205,15 +220,21 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { TEST(GPUDebugAllocatorTest, TracksSizes) { const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); + GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { const CudaGpuId cuda_gpu_id(0); + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + false /*use_unified_memory*/, {}, {}); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), + new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""), cuda_gpu_id), cuda_gpu_id); float* t1 = a.Allocate(1); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 2763ac0d4a..50e61b7e00 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -41,7 +41,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/common_runtime/local_device.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/op_kernel.h" @@ -285,6 +284,38 @@ BaseGPUDevice::~BaseGPUDevice() { for (auto ctx : device_contexts_) ctx->Unref(); } +// This should be idempotent if already initialized. +Status BaseGPUDevice::InitScratchBuffers() { + mutex_lock l(scratch_init_mutex_); + if (scratch_.size() < max_streams_) { + for (int i = 0; i < max_streams_; i++) { + DCHECK(streams_[i]); + if (scratch_.size() > i && scratch_[i]) continue; + size_t scratch_buffer_size = + Eigen::kCudaScratchSize + sizeof(unsigned int); + void* scratch_buffer = gpu_allocator_->AllocateRaw( + Allocator::kAllocatorAlignment, scratch_buffer_size); + if (scratch_buffer == nullptr) { + return errors::FailedPrecondition( + "Failed to allocate scratch buffer for device ", + tf_gpu_id_.value()); + } + se::DeviceMemory mem( + se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); + + bool ok = executor_->SynchronousMemZero( + &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); + if (!ok) { + return errors::FailedPrecondition( + "Failed to memcopy into scratch buffer for device ", + tf_gpu_id_.value()); + } + scratch_.push_back(static_cast(scratch_buffer)); + } + } + return Status::OK(); +} + Status BaseGPUDevice::Init(const SessionOptions& options) { auto executor_status = GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id_); if (!executor_status.status().ok()) { @@ -303,27 +334,6 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { for (int i = 0; i < max_streams_; i++) { streams_.push_back(StreamGroupFactory::Global().GetOrCreate( tf_gpu_id_, i, executor_, options.config.gpu_options())); - - size_t scratch_buffer_size = Eigen::kCudaScratchSize + sizeof(unsigned int); - void* scratch_buffer = gpu_allocator_->AllocateRaw( - Allocator::kAllocatorAlignment, scratch_buffer_size); - if (scratch_buffer == nullptr) { - return errors::FailedPrecondition( - "Failed to allocate scratch buffer for device ", tf_gpu_id_.value()); - } - scratch_.push_back(static_cast(scratch_buffer)); - - se::DeviceMemory mem( - se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size)); - - bool ok = executor_->SynchronousMemZero( - &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); - if (!ok) { - return errors::FailedPrecondition( - "Failed to memcopy into scratch buffer for device ", - tf_gpu_id_.value()); - } - device_contexts_.push_back(new GPUDeviceContext( i, streams_.back()->compute, streams_.back()->host_to_device, streams_.back()->device_to_host, streams_.back()->device_to_device)); @@ -867,10 +877,11 @@ PerOpGpuDevice* BaseGPUDevice::MakeGpuDevice() { return new ConcretePerOpGpuDevice(); } -void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, - PerOpGpuDevice* device, - DeviceContext* dc, - Allocator* allocator) { +Status BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, + PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) { + TF_RETURN_IF_ERROR(InitScratchBuffers()); if (dc) { const GPUDeviceContext* gpu_dc = static_cast(dc); const int stream_id = gpu_dc->stream_id(); @@ -881,6 +892,7 @@ void BaseGPUDevice::ReinitializeGpuDevice(OpKernelContext* context, } else { ReinitializeDevice(context, device, 0, allocator); } + return Status::OK(); } Allocator* BaseGPUDevice::GetScopedAllocator(AllocatorAttributes attr, diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 56d03d7a8c..b3eea55758 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -86,8 +86,9 @@ class BaseGPUDevice : public LocalDevice { // The caller owns the returned device. PerOpGpuDevice* MakeGpuDevice() override; - void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, Allocator* allocator) override; + Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) override; // Returns the CUDA GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. @@ -125,6 +126,7 @@ class BaseGPUDevice : public LocalDevice { class StreamGroupFactory; gtl::InlinedVector streams_; + mutex scratch_init_mutex_; gtl::InlinedVector scratch_; std::vector device_contexts_; GpuDeviceInfo* gpu_device_info_ = nullptr; @@ -135,6 +137,9 @@ class BaseGPUDevice : public LocalDevice { std::unique_ptr em_; std::unique_ptr thread_pool_; + // Initialize scractch buffers used by Eigen. + Status InitScratchBuffers(); + void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index b18688174d..9ec740fabe 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -76,12 +76,16 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) { // This function is defined for debugging problems with the allocators. GPUProcessState::~GPUProcessState() { CHECK_EQ(this, instance_); - for (auto p : gpu_allocators_) { - delete p; - } instance_ = nullptr; } +int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) { + // Return the NUMA node associated with the GPU's StreamExecutor. + se::StreamExecutor* se = + GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie(); + return se->GetDeviceDescription().numa_node(); +} + Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, TfGpuId tf_gpu_id, size_t total_bytes) { @@ -93,13 +97,10 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, if (tf_gpu_id.value() >= static_cast(gpu_allocators_.size())) { gpu_allocators_.resize(tf_gpu_id.value() + 1); - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) - gpu_al_.resize(tf_gpu_id.value() + 1); } - if (gpu_allocators_[tf_gpu_id.value()] == nullptr) { - VisitableAllocator* gpu_allocator; - + AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()]; + if (allocator_parts.allocator.get() == nullptr) { // Validate allocator types. if (!allocator_type.empty() && allocator_type != "BFC") { LOG(ERROR) << "Invalid allocator type: " << allocator_type; @@ -108,8 +109,17 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, CudaGpuId cuda_gpu_id; TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - gpu_allocator = - new GPUBFCAllocator(cuda_gpu_id, total_bytes, options, + int bus_id = BusIdForGPU(tf_gpu_id); + while (bus_id >= gpu_visitors_.size()) { + gpu_visitors_.push_back({}); + } + GPUMemAllocator* sub_allocator = new GPUMemAllocator( + GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), cuda_gpu_id, + (options.per_process_gpu_memory_fraction() > 1.0 || + options.experimental().use_unified_memory()), + gpu_visitors_[bus_id], {}); + Allocator* gpu_allocator = + new GPUBFCAllocator(sub_allocator, total_bytes, options, strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc")); // If true, checks for memory overwrites by writing @@ -123,34 +133,25 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, // **WARNING** probably will not work in a multi-gpu scenario gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id); } - gpu_allocators_[tf_gpu_id.value()] = gpu_allocator; - - // If there are any pending AllocVisitors for this bus, add - // them now. - se::StreamExecutor* se = - GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie(); - int bus_id = se->GetDeviceDescription().numa_node(); - if (bus_id >= 0 && bus_id < static_cast(gpu_visitors_.size())) { - for (const auto& v : gpu_visitors_[bus_id]) { - gpu_allocator->AddAllocVisitor(v); - } - } + + Allocator* recording_allocator = nullptr; if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::GPU; md.dev_index = cuda_gpu_id.value(); md.gpu_registered = false; md.nic_registered = true; - if (static_cast(gpu_al_.size()) <= tf_gpu_id.value()) { - gpu_al_.resize(tf_gpu_id.value() + 1); - } - gpu_al_[tf_gpu_id.value()] = new internal::RecordingAllocator( + recording_allocator = new internal::RecordingAllocator( &process_state_->mem_desc_map_, gpu_allocator, md, &mu_); } + allocator_parts = {std::unique_ptr(gpu_allocator), sub_allocator, + std::unique_ptr(recording_allocator)}; + } + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { + return allocator_parts.recording_allocator.get(); + } else { + return allocator_parts.allocator.get(); } - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) - return gpu_al_[tf_gpu_id.value()]; - return gpu_allocators_[tf_gpu_id.value()]; #else LOG(FATAL) << "GPUAllocator unavailable. Not compiled with --config=cuda."; return nullptr; @@ -172,11 +173,12 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { tf_shared_lock lock(mu_); if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types && - static_cast(cuda_al_.size()) > 0) { - return cuda_al_[0]; + !cuda_host_allocators_.empty() && + cuda_host_allocators_[0].recording_allocator != nullptr) { + return cuda_host_allocators_[0].recording_allocator.get(); } if (static_cast(cuda_host_allocators_.size()) > numa_node) { - return cuda_host_allocators_[0]; + return cuda_host_allocators_[0].allocator.get(); } } @@ -190,7 +192,7 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { // it knows is valid. se::StreamExecutor* se = nullptr; for (int i = 0; i < static_cast(gpu_allocators_.size()); ++i) { - if (gpu_allocators_[i] != nullptr) { + if (gpu_allocators_[i].allocator != nullptr) { se = GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie(); break; } @@ -199,6 +201,15 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { CHECK_NE(nullptr, se); while (static_cast(cuda_host_allocators_.size()) <= numa_node) { + while (cuda_host_alloc_visitors_.size() <= numa_node) { + cuda_host_alloc_visitors_.push_back({}); + } + while (cuda_host_free_visitors_.size() <= numa_node) { + cuda_host_free_visitors_.push_back({}); + } + SubAllocator* sub_allocator = new CUDAHostAllocator( + se, numa_node, cuda_host_alloc_visitors_[numa_node], + cuda_host_free_visitors_[numa_node]); // TODO(zheng-xq): evaluate whether 64GB by default is the best choice. int64 cuda_host_mem_limit_in_mb = -1; Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB", @@ -208,62 +219,92 @@ Allocator* GPUProcessState::GetCUDAHostAllocator(int numa_node) { LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message(); } int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20); - VisitableAllocator* allocator = - new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit, + Allocator* allocator = + new BFCAllocator(sub_allocator, cuda_host_mem_limit, true /*allow_growth*/, "cuda_host_bfc" /*name*/); - if (LogMemory::IsEnabled()) { + if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) { // Wrap the allocator to track allocation ids for better logging // at the cost of performance. - allocator = new TrackingVisitableAllocator(allocator, true); + allocator = new TrackingAllocator(allocator, true); } - cuda_host_allocators_.push_back(allocator); + cuda_host_allocators_.push_back({std::unique_ptr(allocator), + sub_allocator, + std::unique_ptr(nullptr)}); + AllocatorParts& allocator_parts = cuda_host_allocators_.back(); if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::CPU; md.dev_index = 0; md.gpu_registered = true; md.nic_registered = false; - cuda_al_.push_back(new internal::RecordingAllocator( - &process_state_->mem_desc_map_, cuda_host_allocators_.back(), md, - &mu_)); + allocator_parts.recording_allocator.reset( + new internal::RecordingAllocator(&process_state_->mem_desc_map_, + allocator_parts.allocator.get(), md, + &mu_)); } } - if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) - return cuda_al_[0]; - return cuda_host_allocators_[0]; + if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { + return cuda_host_allocators_[0].recording_allocator.get(); + } else { + return cuda_host_allocators_[0].allocator.get(); + } } void GPUProcessState::AddGPUAllocVisitor(int bus_id, - const AllocVisitor& visitor) { - CHECK(process_state_); + const SubAllocator::Visitor& visitor) { #if GOOGLE_CUDA mutex_lock lock(mu_); - for (int i = 0; i < static_cast(gpu_allocators_.size()); ++i) { - se::StreamExecutor* se = - GpuIdUtil::ExecutorForTfGpuId(TfGpuId(i)).ValueOrDie(); - if (gpu_allocators_[i] && - (se->GetDeviceDescription().numa_node() + 1) == bus_id) { - gpu_allocators_[i]->AddAllocVisitor(visitor); - } - } + CHECK(gpu_allocators_.empty()) // Crash OK + << "AddGPUAllocVisitor must be called before " + "first call to GetGPUAllocator."; while (bus_id >= static_cast(gpu_visitors_.size())) { - gpu_visitors_.push_back(std::vector()); + gpu_visitors_.push_back(std::vector()); } gpu_visitors_[bus_id].push_back(visitor); #endif // GOOGLE_CUDA } +void GPUProcessState::AddCUDAHostAllocVisitor( + int numa_node, const SubAllocator::Visitor& visitor) { +#if GOOGLE_CUDA + mutex_lock lock(mu_); + CHECK(cuda_host_allocators_.empty()) // Crash OK + << "AddCUDAHostAllocVisitor must be called before " + "first call to GetCUDAHostAllocator."; + while (numa_node >= static_cast(cuda_host_alloc_visitors_.size())) { + cuda_host_alloc_visitors_.push_back(std::vector()); + } + cuda_host_alloc_visitors_[numa_node].push_back(visitor); +#endif // GOOGLE_CUDA +} + +void GPUProcessState::AddCUDAHostFreeVisitor( + int numa_node, const SubAllocator::Visitor& visitor) { +#if GOOGLE_CUDA + mutex_lock lock(mu_); + CHECK(cuda_host_allocators_.empty()) // Crash OK + << "AddCUDAHostFreeVisitor must be called before " + "first call to GetCUDAHostAllocator."; + while (numa_node >= static_cast(cuda_host_free_visitors_.size())) { + cuda_host_free_visitors_.push_back(std::vector()); + } + cuda_host_free_visitors_[numa_node].push_back(visitor); +#endif // GOOGLE_CUDA +} + void GPUProcessState::TestOnlyReset() { - process_state_->ProcessState::TestOnlyReset(); + if (process_state_) { + process_state_->ProcessState::TestOnlyReset(); + } { mutex_lock lock(mu_); gpu_device_enabled_ = false; + gpu_allocators_.clear(); gpu_visitors_.clear(); - gtl::STLDeleteElements(&gpu_allocators_); - gtl::STLDeleteElements(&cuda_host_allocators_); - gtl::STLDeleteElements(&gpu_al_); - gtl::STLDeleteElements(&cuda_al_); + cuda_host_allocators_.clear(); + cuda_host_alloc_visitors_.clear(); + cuda_host_free_visitors_.clear(); } } diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h index cb41c3c6bd..43e9a31660 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h @@ -32,7 +32,6 @@ limitations under the License. namespace tensorflow { class Allocator; -class VisitableAllocator; class PoolAllocator; // Singleton that manages per-process state when GPUs are present. @@ -72,18 +71,30 @@ class GPUProcessState { virtual Allocator* GetCUDAHostAllocator(int numa_node); - // Registers a function to be called once on every new Region - // allocated by every GPURegionAllocator proximate to the specified - // bus. The AllocVisitor is provided with a memory pointer and the - // size of the area it identifies. The pointer is not guaranteed to - // be valid after the call terminates. The intention is for this - // interface to be used for network device memory registration. - // "bus_id" is platform-specific. On many platforms it - // should be 0. On machines with multiple PCIe buses, it should be - // the index of one of the PCIe buses. If the bus_id is invalid, - // results are undefined. - typedef std::function AllocVisitor; - virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor); + // Registers a Visitor to be invoked on new chunks of memory allocated by the + // SubAllocator of every GPU proximate to the specified bus. The AllocVisitor + // is provided with a memory pointer, a GPU id, and the size of the area it + // identifies. The pointer is not guaranteed to be valid after the call + // terminates. The intention is for this interface to be used for network + // device memory registration. "bus_id" is platform-specific. On many + // platforms it should be 0. On machines with multiple PCIe buses, it should + // be the index of one of the PCIe buses (maybe the NUMA node at which the + // PCIe is rooted). If the bus_id is invalid, results are undefined. + virtual void AddGPUAllocVisitor(int bus_id, + const SubAllocator::Visitor& visitor); + + // Registers a Visitor to be invoked on new chunks of memory allocated by + // the SubAllocator of the CUDAHostAllocator for the given numa_node. + virtual void AddCUDAHostAllocVisitor(int numa_node, + const SubAllocator::Visitor& visitor); + + // Registers a Visitor to be invoked on each chunk handed back for freeing to + // the SubAllocator of the CUDAHostAllocator for the given numa_node. + virtual void AddCUDAHostFreeVisitor(int numa_node, + const SubAllocator::Visitor& visitor); + + // Returns bus_id for the given GPU id. + virtual int BusIdForGPU(TfGpuId tf_gpu_id); protected: GPUProcessState(); @@ -103,16 +114,21 @@ class GPUProcessState { mutex mu_; - std::vector gpu_allocators_ GUARDED_BY(mu_); - std::vector> gpu_visitors_ GUARDED_BY(mu_); - std::vector cuda_host_allocators_ GUARDED_BY(mu_); + struct AllocatorParts { + std::unique_ptr allocator; + SubAllocator* sub_allocator; // owned by allocator + std::unique_ptr recording_allocator; + }; + std::vector gpu_allocators_ GUARDED_BY(mu_); + std::vector> gpu_visitors_ GUARDED_BY(mu_); - virtual ~GPUProcessState(); + std::vector cuda_host_allocators_ GUARDED_BY(mu_); + std::vector> cuda_host_alloc_visitors_ + GUARDED_BY(mu_); + std::vector> cuda_host_free_visitors_ + GUARDED_BY(mu_); - // Optional RecordingAllocators that wrap the corresponding - // Allocators for runtime attribute use analysis. - std::vector gpu_al_ GUARDED_BY(mu_); - std::vector cuda_al_ GUARDED_BY(mu_); + virtual ~GPUProcessState(); friend class GPUDeviceTest; }; diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc index 583bff2c07..6b2f6547b0 100644 --- a/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/pool_allocator_test.cc @@ -31,7 +31,8 @@ TEST(PoolAllocatorTest, ZeroSizeBuffers) { 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/)); @@ -49,7 +50,8 @@ TEST(PoolAllocatorTest, ZeroSizePool) { 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); EXPECT_EQ(0, pool.get_from_pool_count()); @@ -82,7 +84,8 @@ TEST(PoolAllocatorTest, Alignment) { 0 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); for (int i = 0; i < 16; ++i) { size_t alignment = 1 << i; @@ -97,8 +100,8 @@ TEST(PoolAllocatorTest, Alignment) { TEST(PoolAllocatorTest, AutoResize) { PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/, - new BasicCPUAllocator(0 /*numa_node*/), new NoopRounder, - "pool"); + new BasicCPUAllocator(0 /*numa_node*/, {}, {}), + new NoopRounder, "pool"); // Alloc/dealloc 10 sizes just a few times, confirming pool size // stays at 2. @@ -123,14 +126,32 @@ TEST(PoolAllocatorTest, AutoResize) { } TEST(PoolAllocatorTest, CudaHostAllocator) { + int alloc_count = 0; + int64 alloc_size = 0; + SubAllocator::Visitor alloc_visitor = + [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) { + ++alloc_count; + alloc_size += size; + }; + int free_count = 0; + int64 free_size = 0; + SubAllocator::Visitor free_visitor = + [&free_count, &free_size](void* ptr, int numa_node, int64 size) { + ++free_count; + free_size += size; + }; se::Platform* platform = se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie(); - PoolAllocator pool( - 2 /*pool_size_limit*/, false /*auto_resize*/, - new CUDAHostAllocator( - platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), - new NoopRounder, "pool"); + CUDAHostAllocator* sub_allocator = new CUDAHostAllocator( + platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) + .ValueOrDie(), + 0 /*numa_node*/, {alloc_visitor}, {free_visitor}); + PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/, + sub_allocator, new NoopRounder, "pool"); + EXPECT_EQ(0, alloc_count); + EXPECT_EQ(0, alloc_size); + EXPECT_EQ(0, free_count); + EXPECT_EQ(0, free_size); // Repeatedly Get a 16-byte value, confirming that there's only // one real allocation. @@ -138,6 +159,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { EXPECT_EQ(0, pool.get_from_pool_count()); EXPECT_EQ(1, pool.allocated_count()); EXPECT_NE(nullptr, p1_16); + EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes + // Each suballocation includes a 16B ChunkPrefix. + static const int kChunkPrefixSize = 16; + EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size); pool.DeallocateRaw(p1_16); // Pool contents {16} EXPECT_EQ(1, pool.put_count()); @@ -148,6 +173,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { pool.DeallocateRaw(p2_16); // Put it back. // Pool contents {16} EXPECT_EQ(2, pool.put_count()); + EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes + EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(0, free_count); // Get two more values of different sizes. void* p3_4 = pool.AllocateRaw(4, 4); @@ -160,6 +188,9 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { void* p4_2 = pool.AllocateRaw(4, 2); // Get a third size buffer. EXPECT_NE(nullptr, p4_2); EXPECT_EQ(0, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(0, free_count); // The pool is full: when we put back p4_2, the 16-byte buffer // should be evicted since it was least recently inserted. @@ -167,6 +198,10 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { // Pool contents {2, 4} EXPECT_EQ(4, pool.put_count()); EXPECT_EQ(1, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(1, free_count); + EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size); // Re-getting and putting size 2 or 4 should not alter pool size or // num-evicted. @@ -180,12 +215,20 @@ TEST(PoolAllocatorTest, CudaHostAllocator) { EXPECT_EQ(6, pool.put_count()); EXPECT_EQ(3, pool.allocated_count()); EXPECT_EQ(1, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(1, free_count); + EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size); pool.Clear(); EXPECT_EQ(0, pool.get_from_pool_count()); EXPECT_EQ(0, pool.put_count()); EXPECT_EQ(0, pool.allocated_count()); EXPECT_EQ(0, pool.evicted_count()); + EXPECT_EQ(3, alloc_count); + EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size); + EXPECT_EQ(3, free_count); + EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size); } TEST(PoolAllocatorTest, Pow2Rounder) { @@ -206,7 +249,8 @@ TEST(PoolAllocatorTest, Name) { 2 /*pool_size_limit*/, false /*auto_resize*/, new CUDAHostAllocator( platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0)) - .ValueOrDie()), + .ValueOrDie(), + 0 /*numa_node*/, {}, {}), new NoopRounder, "pool"); EXPECT_EQ("pool", pool.Name()); } diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index df9c3a686c..538a70668a 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -23,12 +23,11 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/common_runtime/visitable_allocator.h" -#include "tensorflow/core/framework/allocator_registry.h" +#include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" -#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/numa.h" #ifndef INTEL_MKL_DNN_ONLY #include "i_malloc.h" @@ -40,20 +39,16 @@ typedef unsigned int uint; namespace tensorflow { -class MklSubAllocator : public SubAllocator { +class MklSubAllocator : public BasicCPUAllocator { public: + MklSubAllocator() : BasicCPUAllocator(port::kNUMANoAffinity, {}, {}) {} ~MklSubAllocator() override {} - - void* Alloc(size_t alignment, size_t num_bytes) override { - return port::AlignedMalloc(num_bytes, alignment); - } - void Free(void* ptr, size_t num_bytes) override { port::AlignedFree(ptr); } }; // CPU allocator that handles small-size allocations by calling // suballocator directly. Mostly, it is just a wrapper around a suballocator // (that calls malloc and free directly) with support for bookkeeping. -class MklSmallSizeAllocator : public VisitableAllocator { +class MklSmallSizeAllocator : public Allocator { public: MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory, const string& name) @@ -75,10 +70,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { CHECK(map_.insert(map_val).second); // Increment statistics for small-size allocations. IncrementStats(num_bytes); - // Call alloc visitors. - for (const auto& visitor : alloc_visitors_) { - visitor(ptr, num_bytes); - } } return ptr; } @@ -94,9 +85,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { if (map_iter != map_.end()) { // Call free visitors. size_t dealloc_bytes = map_iter->second; - for (const auto& visitor : free_visitors_) { - visitor(ptr, dealloc_bytes); - } sub_allocator_->Free(ptr, dealloc_bytes); DecrementStats(dealloc_bytes); map_.erase(map_iter); @@ -121,16 +109,6 @@ class MklSmallSizeAllocator : public VisitableAllocator { stats_.Clear(); } - void AddAllocVisitor(Visitor visitor) override { - mutex_lock l(mutex_); - alloc_visitors_.push_back(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - mutex_lock l(mutex_); - free_visitors_.push_back(visitor); - } - private: // Increment statistics for the allocator handling small allocations. inline void IncrementStats(size_t alloc_size) @@ -163,15 +141,11 @@ class MklSmallSizeAllocator : public VisitableAllocator { // Allocator stats for small allocs AllocatorStats stats_ GUARDED_BY(mutex_); - - // Visitors - std::vector alloc_visitors_ GUARDED_BY(mutex_); - std::vector free_visitors_ GUARDED_BY(mutex_); }; /// CPU allocator for MKL that wraps BFC allocator and intercepts /// and redirects memory allocation calls from MKL. -class MklCPUAllocator : public VisitableAllocator { +class MklCPUAllocator : public Allocator { public: // Constructor and other standard functions @@ -284,16 +258,6 @@ class MklCPUAllocator : public VisitableAllocator { large_size_allocator_->ClearStats(); } - void AddAllocVisitor(Visitor visitor) override { - small_size_allocator_->AddAllocVisitor(visitor); - large_size_allocator_->AddAllocVisitor(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - small_size_allocator_->AddFreeVisitor(visitor); - large_size_allocator_->AddFreeVisitor(visitor); - } - private: // Hooks provided by this allocator for memory allocation routines from MKL @@ -330,7 +294,7 @@ class MklCPUAllocator : public VisitableAllocator { // The alignment that we need for the allocations static constexpr const size_t kAlignment = 64; - VisitableAllocator* large_size_allocator_; // owned by this class + Allocator* large_size_allocator_; // owned by this class MklSmallSizeAllocator* small_size_allocator_; // owned by this class. SubAllocator* sub_allocator_; // not owned by this class diff --git a/tensorflow/core/common_runtime/pool_allocator.cc b/tensorflow/core/common_runtime/pool_allocator.cc index fdad8de8d6..66dc8f3322 100644 --- a/tensorflow/core/common_runtime/pool_allocator.cc +++ b/tensorflow/core/common_runtime/pool_allocator.cc @@ -40,8 +40,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize, auto_resize_(auto_resize), pool_size_limit_(pool_size_limit), allocator_(allocator), - size_rounder_(size_rounder), - allocation_begun_(false) { + size_rounder_(size_rounder) { if (auto_resize) { CHECK_LT(size_t{0}, pool_size_limit) << "size limit must be > 0 if auto_resize is true."; @@ -93,7 +92,6 @@ ChunkPrefix* FindPrefix(void* user_ptr) { } // namespace void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { - if (!allocation_begun_) allocation_begun_ = true; if (num_bytes == 0) return nullptr; // If alignment is larger than kPoolAlignment, increase num_bytes so that we @@ -129,9 +127,6 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { return PrepareChunk(r, alignment, num_bytes); } else { void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes); - for (const auto& v : alloc_visitors_) { - v(ptr, num_bytes); - } return PrepareChunk(ptr, alignment, num_bytes); } } @@ -141,9 +136,6 @@ void PoolAllocator::DeallocateRaw(void* ptr) { ChunkPrefix* cp = FindPrefix(ptr); CHECK_LE((void*)cp, (void*)ptr); if (!has_size_limit_ && !auto_resize_) { - for (const auto& v : free_visitors_) { - v(cp, cp->num_bytes); - } allocator_->Free(cp, cp->num_bytes); } else { mutex_lock lock(mutex_); @@ -164,9 +156,6 @@ void PoolAllocator::Clear() { mutex_lock lock(mutex_); for (auto iter : pool_) { PtrRecord* pr = iter.second; - for (const auto& v : free_visitors_) { - v(pr->ptr, pr->num_bytes); - } allocator_->Free(pr->ptr, pr->num_bytes); delete pr; } @@ -221,9 +210,6 @@ void PoolAllocator::EvictOne() { DCHECK(iter != pool_.end()); } pool_.erase(iter); - for (const auto& v : free_visitors_) { - v(prec->ptr, prec->num_bytes); - } allocator_->Free(prec->ptr, prec->num_bytes); delete prec; ++evicted_count_; @@ -269,28 +255,19 @@ void PoolAllocator::EvictOne() { } } -void PoolAllocator::AddAllocVisitor(Visitor visitor) { - mutex_lock lock(mutex_); - CHECK(!allocation_begun_) - << "AddAllocVisitor may not be called after pool allocation " - << "has begun."; - alloc_visitors_.push_back(visitor); -} - -void PoolAllocator::AddFreeVisitor(Visitor visitor) { - mutex_lock lock(mutex_); - CHECK(!allocation_begun_) - << "AddFreeVisitor may not be called after pool allocation " - << "has begun."; - free_visitors_.push_back(visitor); -} - void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) { - return port::AlignedMalloc(num_bytes, static_cast(alignment)); + void* ptr = nullptr; + if (num_bytes > 0) { + ptr = port::AlignedMalloc(num_bytes, static_cast(alignment)); + VisitAlloc(ptr, numa_node_, num_bytes); + } + return ptr; } void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) { - port::AlignedFree(ptr); + if (num_bytes > 0) { + VisitFree(ptr, numa_node_, num_bytes); + port::AlignedFree(ptr); + } } - } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/pool_allocator.h b/tensorflow/core/common_runtime/pool_allocator.h index 607734445b..5b4623ba10 100644 --- a/tensorflow/core/common_runtime/pool_allocator.h +++ b/tensorflow/core/common_runtime/pool_allocator.h @@ -16,14 +16,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ -// Simple LRU pool allocators for various flavors of CPU RAM that -// implement the VisitableAllocator interface. +// Simple LRU pool allocators for various flavors of CPU RAM. #include #include #include #include -#include "tensorflow/core/common_runtime/visitable_allocator.h" +#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -41,7 +40,7 @@ class RoundUpInterface { // Size-limited pool of memory buffers obtained from a SubAllocator // instance. Pool eviction policy is LRU. -class PoolAllocator : public VisitableAllocator { +class PoolAllocator : public Allocator { public: // "pool_size_limit" is the maximum number of returned, re-usable // memory buffers to keep in the pool. If pool_size_limit == 0, the @@ -64,14 +63,6 @@ class PoolAllocator : public VisitableAllocator { void DeallocateRaw(void* ptr) override; - // REQUIRES: The following functions may only be called prior - // to the first Allocate*() call. Once allocation has begun, it is - // illegal to register another visitor. - - void AddAllocVisitor(Visitor visitor) override; - - void AddFreeVisitor(Visitor visitor) override; - // Allocate an unused memory region of size "num_bytes". Fetch from // the pool if available, otherwise call allocator_. void* Get(size_t num_bytes); @@ -141,12 +132,6 @@ class PoolAllocator : public VisitableAllocator { int64 put_count_ GUARDED_BY(mutex_) = 0; int64 allocated_count_ GUARDED_BY(mutex_) = 0; int64 evicted_count_ GUARDED_BY(mutex_) = 0; - // Write access to these is guarded by mutex_, but not read - // access. They may only be modified prior to the first - // allocation. Later attempts to modify will fail. - std::vector alloc_visitors_; - std::vector free_visitors_; - std::atomic allocation_begun_; }; // Do-nothing rounder. Passes through sizes unchanged. @@ -166,7 +151,9 @@ class Pow2Rounder : public RoundUpInterface { class BasicCPUAllocator : public SubAllocator { public: // Argument numa_node is currently ignored. - explicit BasicCPUAllocator(int numa_node) : numa_node_(numa_node) {} + BasicCPUAllocator(int numa_node, const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {} ~BasicCPUAllocator() override {} @@ -176,6 +163,8 @@ class BasicCPUAllocator : public SubAllocator { private: int numa_node_; + + TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator); }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc index 447338e7bd..bcaa37fc8a 100644 --- a/tensorflow/core/common_runtime/process_state.cc +++ b/tensorflow/core/common_runtime/process_state.cc @@ -71,20 +71,28 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) { return MemDesc(); } -VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) { +Allocator* ProcessState::GetCPUAllocator(int numa_node) { CHECK_GE(numa_node, 0); if (!numa_enabled_) numa_node = 0; mutex_lock lock(mu_); while (cpu_allocators_.size() <= static_cast(numa_node)) { + // If visitors have been defined we need an Allocator built from + // a SubAllocator. Prefer BFCAllocator, but fall back to PoolAllocator + // depending on env var setting. + const bool alloc_visitors_defined = + (!cpu_alloc_visitors_.empty() || !cpu_free_visitors_.empty()); bool use_bfc_allocator = false; - // TODO(reedwm): Switch default to BGFAllocator if it's at least as fast and - // efficient. - Status status = ReadBoolFromEnvVar("TF_CPU_ALLOCATOR_USE_BFC", false, - &use_bfc_allocator); + Status status = ReadBoolFromEnvVar( + "TF_CPU_ALLOCATOR_USE_BFC", alloc_visitors_defined, &use_bfc_allocator); if (!status.ok()) { LOG(ERROR) << "GetCPUAllocator: " << status.error_message(); } - VisitableAllocator* allocator; + Allocator* allocator = nullptr; + SubAllocator* sub_allocator = + (alloc_visitors_defined || use_bfc_allocator) + ? new BasicCPUAllocator(numa_enabled_ ? numa_node : -1, + cpu_alloc_visitors_, cpu_free_visitors_) + : nullptr; if (use_bfc_allocator) { // TODO(reedwm): evaluate whether 64GB by default is the best choice. int64 cpu_mem_limit_in_mb = -1; @@ -95,34 +103,63 @@ VisitableAllocator* ProcessState::GetCPUAllocator(int numa_node) { LOG(ERROR) << "GetCPUAllocator: " << status.error_message(); } int64 cpu_mem_limit = cpu_mem_limit_in_mb * (1LL << 20); - allocator = new BFCAllocator( - new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), cpu_mem_limit, - true /*allow_growth*/, "bfc_cpu_allocator_for_gpu" /*name*/); + DCHECK(sub_allocator); + allocator = + new BFCAllocator(sub_allocator, cpu_mem_limit, true /*allow_growth*/, + "bfc_cpu_allocator_for_gpu" /*name*/); VLOG(2) << "Using BFCAllocator with memory limit of " << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator"; - } else { - allocator = new PoolAllocator( - 100 /*pool_size_limit*/, true /*auto_resize*/, - new BasicCPUAllocator(numa_enabled_ ? numa_node : -1), - new NoopRounder, "cpu_pool"); + } else if (alloc_visitors_defined) { + DCHECK(sub_allocator); + allocator = + new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/, + sub_allocator, new NoopRounder, "cpu_pool"); VLOG(2) << "Using PoolAllocator for ProcessState CPU allocator " << "numa_enabled_=" << numa_enabled_ << " numa_node=" << numa_node; + } else { + DCHECK(!sub_allocator); + allocator = cpu_allocator(); } - if (LogMemory::IsEnabled()) { + if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) { // Wrap the allocator to track allocation ids for better logging // at the cost of performance. - allocator = new TrackingVisitableAllocator(allocator, true); + allocator = new TrackingAllocator(allocator, true); } cpu_allocators_.push_back(allocator); + if (!sub_allocator) { + DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty()); + } } return cpu_allocators_[numa_node]; } +void ProcessState::AddCPUAllocVisitor(SubAllocator::Visitor visitor) { + VLOG(1) << "AddCPUAllocVisitor"; + mutex_lock lock(mu_); + CHECK_EQ(0, cpu_allocators_.size()) // Crash OK + << "AddCPUAllocVisitor must be called prior to first call to " + "ProcessState::GetCPUAllocator"; + cpu_alloc_visitors_.push_back(std::move(visitor)); +} + +void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) { + mutex_lock lock(mu_); + CHECK_EQ(0, cpu_allocators_.size()) // Crash OK + << "AddCPUFreeVisitor must be called prior to first call to " + "ProcessState::GetCPUAllocator"; + cpu_free_visitors_.push_back(std::move(visitor)); +} + void ProcessState::TestOnlyReset() { mutex_lock lock(mu_); + // Don't delete this value because it's static. + Allocator* default_cpu_allocator = cpu_allocator(); mem_desc_map_.clear(); - gtl::STLDeleteElements(&cpu_allocators_); + for (Allocator* a : cpu_allocators_) { + if (a != default_cpu_allocator) delete a; + } + cpu_allocators_.clear(); gtl::STLDeleteElements(&cpu_al_); } diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h index 2892677333..cac312d849 100644 --- a/tensorflow/core/common_runtime/process_state.h +++ b/tensorflow/core/common_runtime/process_state.h @@ -30,7 +30,6 @@ limitations under the License. namespace tensorflow { class Allocator; -class VisitableAllocator; class PoolAllocator; // Singleton that manages per-process state, e.g. allocation of @@ -65,7 +64,15 @@ class ProcessState { // Returns the one CPUAllocator used for the given numa_node. // TEMPORARY: ignores numa_node. - VisitableAllocator* GetCPUAllocator(int numa_node); + Allocator* GetCPUAllocator(int numa_node); + + // Registers alloc visitor for the CPU allocator(s). + // REQUIRES: must be called before GetCPUAllocator. + void AddCPUAllocVisitor(SubAllocator::Visitor v); + + // Registers free visitor for the CPU allocator(s). + // REQUIRES: must be called before GetCPUAllocator. + void AddCPUFreeVisitor(SubAllocator::Visitor v); typedef std::unordered_map MDMap; @@ -87,7 +94,9 @@ class ProcessState { mutex mu_; - std::vector cpu_allocators_ GUARDED_BY(mu_); + std::vector cpu_allocators_ GUARDED_BY(mu_); + std::vector cpu_alloc_visitors_ GUARDED_BY(mu_); + std::vector cpu_free_visitors_ GUARDED_BY(mu_); virtual ~ProcessState(); diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h index 103eee03b3..9d59264899 100644 --- a/tensorflow/core/common_runtime/renamed_device.h +++ b/tensorflow/core/common_runtime/renamed_device.h @@ -72,9 +72,10 @@ class RenamedDevice : public Device { return underlying_->MakeGpuDevice(); } - void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, Allocator* allocator) override { - underlying_->ReinitializeGpuDevice(context, device, dc, allocator); + Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) override { + return underlying_->ReinitializeGpuDevice(context, device, dc, allocator); } Status MakeTensorFromProto(const TensorProto& tensor_proto, diff --git a/tensorflow/core/common_runtime/visitable_allocator.h b/tensorflow/core/common_runtime/visitable_allocator.h deleted file mode 100644 index ae0563a96a..0000000000 --- a/tensorflow/core/common_runtime/visitable_allocator.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ - -#include -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/tracking_allocator.h" - -namespace tensorflow { - -// Subclass VisitableAllocator instead of Allocator when a memory -// allocator needs to enable some kind of registration/deregistration -// of memory areas. -class VisitableAllocator : public Allocator { - public: - // Visitor gets called with a pointer to a memory area and its - // size in bytes. - typedef std::function Visitor; - - // Register a visitor guaranteed to be called exactly once on each - // chunk of memory newly allocated from the underlying device. - // Typically, chunks will be reused and possibly sub-divided by a - // pool manager, so the calls will happen only once per process - // execution, not once per tensor (re)allocation. - virtual void AddAllocVisitor(Visitor visitor) = 0; - - // Register a visitor guaranteed to be called on each chunk of - // memory returned to the underlying device. - virtual void AddFreeVisitor(Visitor visitor) = 0; -}; - -// Needed for cases when a VisitableAllocator gets wrapped for tracking. -// Multiple-inheritance is considered acceptable in this case because -// VisitableAllocator is a pure virtual interface and only TrackingAllocator -// has default implementation. -class TrackingVisitableAllocator : public TrackingAllocator, - public VisitableAllocator { - public: - TrackingVisitableAllocator(VisitableAllocator* allocator, bool track_ids) - : TrackingAllocator(allocator, track_ids), allocator_(allocator) {} - ~TrackingVisitableAllocator() override {} - - string Name() override { return TrackingAllocator::Name(); } - - void* AllocateRaw(size_t alignment, size_t num_bytes) override { - return TrackingAllocator::AllocateRaw(alignment, num_bytes); - } - - void DeallocateRaw(void* ptr) override { - TrackingAllocator::DeallocateRaw(ptr); - } - - void AddAllocVisitor(Visitor visitor) override { - allocator_->AddAllocVisitor(visitor); - } - - void AddFreeVisitor(Visitor visitor) override { - allocator_->AddFreeVisitor(visitor); - } - - protected: - VisitableAllocator* allocator_; -}; -} // namespace tensorflow -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_VISITABLE_ALLOCATOR_H_ diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index 2a7ee16a16..84cee5569c 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -196,7 +196,7 @@ class CPUAllocatorFactory : public AllocatorFactory { class CPUSubAllocator : public SubAllocator { public: explicit CPUSubAllocator(CPUAllocator* cpu_allocator) - : cpu_allocator_(cpu_allocator) {} + : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {} void* Alloc(size_t alignment, size_t num_bytes) override { return cpu_allocator_->AllocateRaw(alignment, num_bytes); @@ -222,4 +222,22 @@ Allocator* cpu_allocator() { } return cpu_alloc; } + +SubAllocator::SubAllocator(const std::vector& alloc_visitors, + const std::vector& free_visitors) + : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {} + +void SubAllocator::VisitAlloc(void* ptr, int index, size_t num_bytes) { + for (const auto& v : alloc_visitors_) { + v(ptr, index, num_bytes); + } +} + +void SubAllocator::VisitFree(void* ptr, int index, size_t num_bytes) { + // Although we don't guarantee any order of visitor application, strive + // to apply free visitors in reverse order of alloc visitors. + for (int i = free_visitors_.size() - 1; i >= 0; --i) { + free_visitors_[i](ptr, index, num_bytes); + } +} } // namespace tensorflow diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index ded120b704..8c23604625 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -387,13 +388,36 @@ void EnableCPUAllocatorStats(bool enable); // full statistics. By default, it's disabled. void EnableCPUAllocatorFullStats(bool enable); -// Abstract interface of an object that does the underlying suballoc/free of -// memory for a higher-level allocator. +// An object that does the underlying suballoc/free of memory for a higher-level +// allocator. The expectation is that the higher-level allocator is doing some +// kind of cache or pool management so that it will call SubAllocator::Alloc and +// Free relatively infrequently, compared to the number of times its own +// AllocateRaw and Free methods are called. class SubAllocator { public: + // Visitor gets called with a pointer to a memory area and its + // size in bytes. The index value will be numa_node for a CPU + // allocator and GPU id for a GPU allocator. + typedef std::function Visitor; + + SubAllocator(const std::vector& alloc_visitors, + const std::vector& free_visitors); + virtual ~SubAllocator() {} virtual void* Alloc(size_t alignment, size_t num_bytes) = 0; virtual void Free(void* ptr, size_t num_bytes) = 0; + + protected: + // Implementation of Alloc() method must call this on newly allocated + // value. + void VisitAlloc(void* ptr, int index, size_t num_bytes); + + // Implementation of Free() method must call this on value to be + // freed immediately before deallocation. + void VisitFree(void* ptr, int index, size_t num_bytes); + + const std::vector alloc_visitors_; + const std::vector free_visitors_; }; } // namespace tensorflow diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 794250a2c1..53ac639b4c 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -214,10 +214,12 @@ class DeviceBase { // This is overridden by GPU devices to reinitialize the derived // type returned by MakeGpuDevice. - virtual void ReinitializeGpuDevice(OpKernelContext* /*context*/, - PerOpGpuDevice* /*device*/, - DeviceContext* /*dc*/, - Allocator* /*allocator*/) {} + virtual Status ReinitializeGpuDevice(OpKernelContext* /*context*/, + PerOpGpuDevice* /*device*/, + DeviceContext* /*dc*/, + Allocator* /*allocator*/) { + return Status::OK(); + } // Unimplemented by default virtual const DeviceAttributes& attributes() const; diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 80f2b12987..3e34bf0418 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -265,9 +265,12 @@ OpKernelContext::OpKernelContext(Params* params, int num_outputs) params_->ensure_eigen_gpu_device(); if (params_->eigen_gpu_device != nullptr) { Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes()); - params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device, - params_->op_device_context, - eigen_gpu_allocator); + Status s = params_->device->ReinitializeGpuDevice( + this, params_->eigen_gpu_device, params_->op_device_context, + eigen_gpu_allocator); + if (!s.ok()) { + SetStatus(s); + } } if (params_->record_tensor_accesses) { referenced_tensors_.Init(); -- GitLab From 964a32573bffbb798d0eb97ec9b37da0657c4dbd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 18 Sep 2018 14:41:37 -0700 Subject: [PATCH 0338/1357] Clean up remove_negation pass in Grappler. PiperOrigin-RevId: 213520177 --- .../optimizers/arithmetic_optimizer.cc | 42 +++++++------------ .../optimizers/arithmetic_optimizer_test.cc | 42 +++++++++++-------- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 11ce121cba..992e85d2c6 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -1325,38 +1325,26 @@ class RemoveNegationStage : public ArithmeticOptimizerStage { } Status TrySimplify(NodeDef* node, string* simplified_node_name) override { - const string node_name = node->name(); NodeDef* x; NodeDef* y; TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &x)); TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &y)); bool updated = false; - if (IsAdd(*node)) { - if (IsNeg(*x)) { - // (-a) + b = b - a - node->set_op("Sub"); - node->mutable_input()->SwapElements(0, 1); - node->set_input(1, x->input(0)); - node->add_input(AsControlDependency(x->name())); - ctx().node_map->AddOutput(NodeName(x->input(0)), node_name); - updated = true; - } else if (IsNeg(*y)) { - // a + (-b) = a - b - node->set_op("Sub"); - node->set_input(1, y->input(0)); - node->add_input(AsControlDependency(y->name())); - ctx().node_map->AddOutput(NodeName(y->input(0)), node_name); - updated = true; - } - } else if (IsSub(*node)) { - if (IsNeg(*y)) { - // a - (-b) = a + b - node->set_op("Add"); - node->set_input(1, y->input(0)); - node->add_input(AsControlDependency(y->name())); - ctx().node_map->AddOutput(NodeName(y->input(0)), node_name); - updated = true; - } + if (IsNeg(*y)) { + // a - (-b) = a + b or a + (-b) = a - b + ForwardControlDependencies(node, {y}); + ctx().node_map->UpdateInput(node->name(), node->input(1), y->input(0)); + node->set_op(IsAdd(*node) ? "Sub" : "Add"); + node->set_input(1, y->input(0)); + updated = true; + } else if (IsAdd(*node) && IsNeg(*x)) { + // (-a) + b = b - a + ForwardControlDependencies(node, {x}); + ctx().node_map->UpdateInput(node->name(), node->input(0), x->input(0)); + node->set_op("Sub"); + node->mutable_input()->SwapElements(0, 1); + node->set_input(1, x->input(0)); + updated = true; } if (updated) { AddToOptimizationQueue(node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index bc838c6659..88839d944c 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -2353,9 +2353,14 @@ TEST_F(ArithmeticOptimizerTest, RemoveNegation) { Output sub_negx_y = ops::Sub(s.WithOpName("Sub_negx_y"), neg_x, y); Output sub_x_negy = ops::Sub(s.WithOpName("Sub_x_negy"), x, neg_y); Output sub_negx_negy = ops::Sub(s.WithOpName("Sub_negx_negy"), neg_x, neg_y); - auto add_all = ops::AddN(s.WithOpName("add_all"), - {add_x_y, add_negx_y, add_x_negy, add_negx_negy, - sub_x_y, sub_negx_y, sub_x_negy, sub_negx_negy}); + Output neg_x_with_dep = ops::Neg( + s.WithOpName("Neg_x_with_dep").WithControlDependencies({add_x_y}), x); + Output add_negx_with_dep_y = + ops::Add(s.WithOpName("Add_negx_with_dep_y"), neg_x_with_dep, y); + auto add_all = + ops::AddN(s.WithOpName("add_all"), + {add_x_y, add_negx_y, add_x_negy, add_negx_negy, sub_x_y, + sub_negx_y, sub_x_negy, sub_negx_negy, add_negx_with_dep_y}); GrapplerItem item; item.fetch = {"add_all"}; @@ -2370,7 +2375,7 @@ TEST_F(ArithmeticOptimizerTest, RemoveNegation) { GraphDef output; ArithmeticOptimizer optimizer; EnableOnlyRemoveNegation(&optimizer); - OptimizeAndPrune(&optimizer, &item, &output); + OptimizeTwice(&optimizer, &item, &output); EXPECT_EQ(item.graph.node_size(), output.node_size()); int found = 0; @@ -2379,42 +2384,43 @@ TEST_F(ArithmeticOptimizerTest, RemoveNegation) { if (node.name() == "Add_negx_y") { ++found; EXPECT_EQ("Sub", node.op()); - EXPECT_EQ(3, node.input_size()); + EXPECT_EQ(2, node.input_size()); EXPECT_EQ("y", node.input(0)); EXPECT_EQ("x", node.input(1)); - EXPECT_EQ("^Neg_x", node.input(2)); } else if (node.name() == "Add_x_negy") { ++found; EXPECT_EQ("Sub", node.op()); - EXPECT_EQ(3, node.input_size()); + EXPECT_EQ(2, node.input_size()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("^Neg_y", node.input(2)); } else if (node.name() == "Add_negx_negy") { ++found; EXPECT_EQ("Sub", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("Neg_y", node.input(0)); - EXPECT_EQ("x", node.input(1)); - EXPECT_EQ("^Neg_x", node.input(2)); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("Neg_x", node.input(0)); + EXPECT_EQ("y", node.input(1)); } else if (node.name() == "Sub_x_negy") { ++found; EXPECT_EQ("Add", node.op()); - EXPECT_EQ(3, node.input_size()); + EXPECT_EQ(2, node.input_size()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("^Neg_y", node.input(2)); } else if (node.name() == "Sub_negx_negy") { ++found; EXPECT_EQ("Sub", node.op()); - EXPECT_EQ(4, node.input_size()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("x", node.input(1)); + } else if (node.name() == "Add_negx_with_dep_y") { + ++found; + EXPECT_EQ("Sub", node.op()); + EXPECT_EQ(3, node.input_size()); EXPECT_EQ("y", node.input(0)); EXPECT_EQ("x", node.input(1)); - EXPECT_EQ("^Neg_y", node.input(2)); - EXPECT_EQ("^Neg_x", node.input(3)); + EXPECT_EQ("^Add_x_y", node.input(2)); } } - EXPECT_EQ(5, found); + EXPECT_EQ(6, found); auto tensors = EvaluateNodes(output, item.fetch, feed); EXPECT_EQ(1, tensors.size()); -- GitLab From c2d392e36a3f68c2e1b8fdfa280953efc6426c52 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Tue, 18 Sep 2018 15:14:24 -0700 Subject: [PATCH 0339/1357] Add error reporting TFLIte C API PiperOrigin-RevId: 213526489 --- .../contrib/lite/experimental/c/c_api.cc | 46 ++++++++++++++++++- .../contrib/lite/experimental/c/c_api.h | 12 +++++ .../lite/experimental/c/c_api_internal.h | 14 ++++++ .../contrib/lite/experimental/c/c_api_test.cc | 31 +++++++++++++ 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/experimental/c/c_api.cc b/tensorflow/contrib/lite/experimental/c/c_api.cc index 1c3996fb87..9c29f9d8b9 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" #include "tensorflow/contrib/lite/experimental/c/c_api_internal.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" @@ -26,6 +27,26 @@ limitations under the License. extern "C" { #endif // __cplusplus +namespace { +class CallbackErrorReporter : public tflite::ErrorReporter { + public: + using ErrorCallback = void (*)(void* user_data, const char* format, + va_list args); + + CallbackErrorReporter(ErrorCallback callback, void* user_data) + : callback_(callback), user_data_(user_data) {} + + int Report(const char* format, va_list args) override { + callback_(user_data_, format, args); + return 0; + } + + private: + ErrorCallback callback_; + void* user_data_; +}; +} // namespace + // LINT.IfChange TFL_Model* TFL_NewModel(const void* model_data, size_t model_size) { @@ -56,18 +77,38 @@ void TFL_InterpreterOptionsSetNumThreads(TFL_InterpreterOptions* options, options->num_threads = num_threads; } +TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetErrorReporter( + TFL_InterpreterOptions* options, + void (*reporter)(void* user_data, const char* format, va_list args), + void* user_data) { + options->error_reporter = reporter; + options->error_reporter_user_data = user_data; +} + TFL_Interpreter* TFL_NewInterpreter( const TFL_Model* model, const TFL_InterpreterOptions* optional_options) { if (!model || !model->impl) { return nullptr; } + std::unique_ptr optional_error_reporter; + if (optional_options && optional_options->error_reporter != nullptr) { + optional_error_reporter.reset( + new CallbackErrorReporter(optional_options->error_reporter, + optional_options->error_reporter_user_data)); + } + // TODO(b/111881878): Allow use of C API without pulling in all builtin ops. tflite::ops::builtin::BuiltinOpResolver resolver; if (optional_options) { resolver.AddAll(optional_options->op_resolver); } - tflite::InterpreterBuilder builder(*model->impl, resolver); + tflite::ErrorReporter* error_reporter = optional_error_reporter + ? optional_error_reporter.get() + : tflite::DefaultErrorReporter(); + tflite::InterpreterBuilder builder(model->impl->GetModel(), resolver, + error_reporter); + std::unique_ptr interpreter; if (builder(&interpreter) != kTfLiteOk) { return nullptr; @@ -80,7 +121,8 @@ TFL_Interpreter* TFL_NewInterpreter( } } - return new TFL_Interpreter{model->impl, std::move(interpreter)}; + return new TFL_Interpreter{model->impl, std::move(optional_error_reporter), + std::move(interpreter)}; } void TFL_DeleteInterpreter(TFL_Interpreter* interpreter) { delete interpreter; } diff --git a/tensorflow/contrib/lite/experimental/c/c_api.h b/tensorflow/contrib/lite/experimental/c/c_api.h index 44b936aa87..f52ab8f9ed 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api.h +++ b/tensorflow/contrib/lite/experimental/c/c_api.h @@ -15,6 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_H_ #define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_C_C_API_H_ +#include #include // Eventually the various C APIs defined in context.h will be migrated into @@ -86,6 +87,17 @@ TFL_CAPI_EXPORT extern void TFL_DeleteInterpreterOptions( TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetNumThreads( TFL_InterpreterOptions* options, int32_t num_threads); +// Sets a custom error reporter for interpreter execution. +// +// * `reporter` takes the provided `user_data` object, as well as a C-style +// format string and arg list (see also vprintf). +// * `user_data` is optional. If provided, it is owned by the client and must +// remain valid for the duration of the interpreter lifetime. +TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetErrorReporter( + TFL_InterpreterOptions* options, + void (*reporter)(void* user_data, const char* format, va_list args), + void* user_data); + // -------------------------------------------------------------------------- // TFL_Interpreter provides inference from a provided model. typedef struct TFL_Interpreter TFL_Interpreter; diff --git a/tensorflow/contrib/lite/experimental/c/c_api_internal.h b/tensorflow/contrib/lite/experimental/c/c_api_internal.h index af675ac98a..da3af3cad4 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_internal.h +++ b/tensorflow/contrib/lite/experimental/c/c_api_internal.h @@ -23,6 +23,9 @@ limitations under the License. // Internal structures used by the C API. These are likely to change and should // not be depended on. +// +// NOTE: This header does not follow C conventions and does not define a C API. +// It is effectively an (internal) implementation detail of the C API. struct TFL_Model { // Sharing is safe as FlatBufferModel is const. @@ -34,13 +37,24 @@ struct TFL_InterpreterOptions { kDefaultNumThreads = -1, }; int num_threads = kDefaultNumThreads; + tflite::MutableOpResolver op_resolver; + + void (*error_reporter)(void* user_data, const char* format, + va_list args) = nullptr; + void* error_reporter_user_data = nullptr; }; struct TFL_Interpreter { // Taking a reference to the (const) model data avoids lifetime-related issues // and complexity with the TFL_Model's existence. std::shared_ptr model; + + // The interpreter does not take ownership of the provided ErrorReporter + // instance, so we ensure its validity here. Note that the interpreter may use + // the reporter in its destructor, so it should be declared first. + std::unique_ptr optional_error_reporter; + std::unique_ptr impl; }; diff --git a/tensorflow/contrib/lite/experimental/c/c_api_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_test.cc index 649dac8d1a..48a3714ec3 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_test.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_test.cc @@ -85,6 +85,37 @@ TEST(CApiSimple, Smoke) { TFL_DeleteInterpreter(interpreter); } +TEST(CApiSimple, ErrorReporter) { + TFL_Model* model = TFL_NewModelFromFile( + "tensorflow/contrib/lite/testdata/add.bin"); + TFL_InterpreterOptions* options = TFL_NewInterpreterOptions(); + + // Install a custom error reporter into the interpreter by way of options. + tflite::TestErrorReporter reporter; + TFL_InterpreterOptionsSetErrorReporter( + options, + [](void* user_data, const char* format, va_list args) { + reinterpret_cast(user_data)->Report(format, + args); + }, + &reporter); + TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options); + + // The options/model can be deleted immediately after interpreter creation. + TFL_DeleteInterpreterOptions(options); + TFL_DeleteModel(model); + + // Invoke the interpreter before tensor allocation. + EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteError); + + // The error should propagate to the custom error reporter. + EXPECT_EQ(reporter.error_messages(), + "Invoke called on model that is not ready."); + EXPECT_EQ(reporter.num_calls(), 1); + + TFL_DeleteInterpreter(interpreter); +} + } // namespace int main(int argc, char** argv) { -- GitLab From 228572ecf387931b14e92555a2234dc085813e21 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Tue, 18 Sep 2018 15:24:59 -0700 Subject: [PATCH 0340/1357] [TF:XLA] Document that the order of control predecessors matters. PiperOrigin-RevId: 213528296 --- tensorflow/compiler/xla/service/hlo_instruction.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 4f6cac1396..1ef8cd5036 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1616,6 +1616,10 @@ class HloInstruction { InstructionVector operands_; // The set of control predecessors of this instruction. + // Note that the order of the instructions in the vector influences the order + // computed in HloComputation::ComputeInstructionPostOrder, which may + // influence the result of the compilation by changing the scheduling. We are + // not sure if it matters. std::vector control_predecessors_; // The users of this instruction. Users are HLOs where this instruction is an -- GitLab From 0bd8f45ed9ee929225e8be93e7b998085fd2ba74 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 18 Sep 2018 15:27:20 -0700 Subject: [PATCH 0341/1357] Automated rollback of commit b1ff7c2cedcc7d49d430d56655870e6d68a0c8f7 PiperOrigin-RevId: 213528716 --- .../core/kernels/data/captured_function.cc | 206 ++++++++++-------- .../core/kernels/data/captured_function.h | 129 +++++------ tensorflow/core/kernels/data/dataset_utils.cc | 9 +- tensorflow/core/kernels/data/dataset_utils.h | 5 +- .../core/kernels/data/filter_dataset_op.cc | 33 ++- .../core/kernels/data/flat_map_dataset_op.cc | 7 +- .../core/kernels/data/generator_dataset_op.cc | 23 +- .../data/group_by_reducer_dataset_op.cc | 31 ++- .../data/group_by_window_dataset_op.cc | 25 +-- .../kernels/data/interleave_dataset_op.cc | 8 +- .../kernels/data/map_and_batch_dataset_op.cc | 6 +- .../core/kernels/data/map_dataset_op.cc | 6 +- .../data/parallel_interleave_dataset_op.cc | 16 +- .../kernels/data/parallel_map_dataset_op.cc | 57 ++--- .../kernels/data/parallel_map_iterator.cc | 37 +++- .../core/kernels/data/parallel_map_iterator.h | 44 ++-- .../kernels/data/parse_example_dataset_op.cc | 185 +++++++--------- .../core/kernels/data/scan_dataset_op.cc | 8 +- 18 files changed, 395 insertions(+), 440 deletions(-) diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index 96ae8e16d5..b3ab7e2bc6 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -46,36 +46,10 @@ Status CapturedFunction::Create( return Status::OK(); } -Status CapturedFunction::Instantiate( - IteratorContext* ctx, std::unique_ptr* - instantiated_captured_function) { - // The context's runtime will be used for all subsequent calls. - FunctionLibraryRuntime* lib = ctx->lib(); - FunctionLibraryRuntime::InstantiateOptions inst_opts; - inst_opts.overlay_lib = ctx->function_library().get(); - inst_opts.state_handle = std::to_string(random::New64()); - inst_opts.create_kernels_eagerly = true; - if (!use_inter_op_parallelism_) { - inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR"; - } - - FunctionLibraryRuntime::Handle f_handle; - Status s = (lib->Instantiate(func_.name(), AttrSlice(&func_.attr()), - inst_opts, &f_handle)); - TF_RETURN_IF_ERROR(s); - const FunctionBody* fbody = lib->GetFunctionBody(f_handle); - if (fbody == nullptr) { - return errors::Internal("Failed to instantiate function body."); - } - - DataTypeVector ret_types; - for (const auto& ret_type : fbody->ret_types) { - ret_types.push_back(ret_type); +CapturedFunction::~CapturedFunction() { + if (lib_ != nullptr && f_handle_ != kInvalidHandle) { + lib_->ReleaseHandle(f_handle_).IgnoreError(); } - - instantiated_captured_function->reset(new InstantiatedCapturedFunction( - lib, f_handle, std::move(ret_types), *ctx->runner(), this)); - return Status::OK(); } namespace { @@ -198,34 +172,35 @@ class BorrowedArgsCallFrame : public CallFrameBase { } // namespace -InstantiatedCapturedFunction::InstantiatedCapturedFunction( - FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle, - DataTypeVector ret_types, std::function)> runner, - CapturedFunction* captured_func) - : lib_(lib), - f_handle_(f_handle), - ret_types_(std::move(ret_types)), - captured_runner_(std::move(runner)), - captured_func_(captured_func) {} - -InstantiatedCapturedFunction::~InstantiatedCapturedFunction() { - if (lib_ != nullptr && f_handle_ != kInvalidHandle) { - lib_->ReleaseHandle(f_handle_).IgnoreError(); +Status CapturedFunction::GetHandle(IteratorContext* ctx, + FunctionLibraryRuntime::Handle* out_handle) { + tf_shared_lock l(mu_); + if (lib_ == nullptr) { + return errors::Internal("Captured function \"", func_.name(), + "\" was called before it was instantiated."); } + if (ctx->lib() != lib_) { + return errors::Internal("Captured function \"", func_.name(), + "\" was called with a different " + "FunctionLibraryRuntime*, which is not permitted."); + } + *out_handle = f_handle_; + return Status::OK(); } -Status InstantiatedCapturedFunction::Run(IteratorContext* ctx, - std::vector&& args, - std::vector* rets) const { +Status CapturedFunction::Run(IteratorContext* ctx, std::vector&& args, + std::vector* rets) { + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(GetHandle(ctx, &handle)); + FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); - ScopedStepContainer step_container( - f_opts.step_id, [this](const string& name) { - lib_->device()->resource_manager()->Cleanup(name).IgnoreError(); - }); + f_opts.step_id = CapturedFunction::generate_step_id(); + ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) { + ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); f_opts.step_container = &step_container; f_opts.runner = ctx->runner(); - if (lib_->device()->device_type() != DEVICE_CPU) { + if (ctx->lib()->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -237,11 +212,10 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx, CancellationManager c_mgr; f_opts.cancellation_manager = &c_mgr; - OwnedArgsCallFrame frame(std::move(args), &captured_func_->captured_inputs(), - ret_types_); + OwnedArgsCallFrame frame(std::move(args), &captured_inputs_, ret_types_); Notification n; Status s; - lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) { + ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) { s.Update(func_status); n.Notify(); }); @@ -250,18 +224,20 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx, return frame.ConsumeRetvals(rets); } -Status InstantiatedCapturedFunction::RunWithBorrowedArgs( - IteratorContext* ctx, const std::vector& args, - std::vector* rets) const { +Status CapturedFunction::RunWithBorrowedArgs(IteratorContext* ctx, + const std::vector& args, + std::vector* rets) { + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(GetHandle(ctx, &handle)); + FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); - ScopedStepContainer step_container( - f_opts.step_id, [this](const string& name) { - lib_->device()->resource_manager()->Cleanup(name).IgnoreError(); - }); + f_opts.step_id = CapturedFunction::generate_step_id(); + ScopedStepContainer step_container(f_opts.step_id, [ctx](const string& name) { + ctx->lib()->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); f_opts.step_container = &step_container; f_opts.runner = ctx->runner(); - if (lib_->device()->device_type() != DEVICE_CPU) { + if (ctx->lib()->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -273,12 +249,11 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs( CancellationManager c_mgr; f_opts.cancellation_manager = &c_mgr; - BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(), - ret_types_); + BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_); Notification n; Status s; - lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) { + ctx->lib()->Run(f_opts, handle, &frame, [&n, &s](Status func_status) { s.Update(func_status); n.Notify(); }); @@ -287,17 +262,65 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs( return frame.ConsumeRetvals(rets); } -Status InstantiatedCapturedFunction::RunInstantiated( - const std::vector& args, std::vector* rets) { +Status CapturedFunction::Instantiate(IteratorContext* ctx) { + mutex_lock l(mu_); + if (lib_ == nullptr) { + // The context's runtime will be used for all subsequent calls. + lib_ = ctx->lib(); + DCHECK(f_handle_ == kInvalidHandle); + FunctionLibraryRuntime::InstantiateOptions inst_opts; + inst_opts.overlay_lib = ctx->function_library().get(); + inst_opts.state_handle = std::to_string(random::New64()); + inst_opts.create_kernels_eagerly = true; + if (!use_inter_op_parallelism_) { + inst_opts.executor_type = "SINGLE_THREADED_EXECUTOR"; + } + Status s = (lib_->Instantiate(func_.name(), AttrSlice(&func_.attr()), + inst_opts, &f_handle_)); + TF_RETURN_IF_ERROR(s); + const FunctionBody* fbody = lib_->GetFunctionBody(f_handle_); + if (fbody == nullptr) { + return errors::Internal("Failed to instantiate function body."); + } + ret_types_ = fbody->ret_types; + } else { + if (ctx->lib() != lib_) { + return errors::Internal( + "Captured function was called with a different " + "FunctionLibraryRuntime*, which is not permitted."); + } + } + if (captured_runner_ == nullptr) { + captured_runner_ = *ctx->runner(); + } + return Status::OK(); +} + +Status CapturedFunction::RunInstantiated(const std::vector& args, + std::vector* rets) { + FunctionLibraryRuntime* lib; + FunctionLibraryRuntime::Handle handle; + std::function)>* runner; + { + tf_shared_lock l(mu_); + if (lib_ == nullptr) { + return errors::FailedPrecondition( + "`CapturedFunction::Instantiate()` must be called before a call to " + "`CapturedFunction::RunInstantiated()`."); + } + lib = lib_; + handle = f_handle_; + runner = &captured_runner_; + } + FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); - ScopedStepContainer step_container( - f_opts.step_id, [this](const string& name) { - lib_->device()->resource_manager()->Cleanup(name).IgnoreError(); - }); + f_opts.step_id = CapturedFunction::generate_step_id(); + ScopedStepContainer step_container(f_opts.step_id, [lib](const string& name) { + lib->device()->resource_manager()->Cleanup(name).IgnoreError(); + }); f_opts.step_container = &step_container; - f_opts.runner = &captured_runner_; - if (lib_->device()->device_type() != DEVICE_CPU) { + f_opts.runner = runner; + if (lib->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -309,12 +332,11 @@ Status InstantiatedCapturedFunction::RunInstantiated( CancellationManager c_mgr; f_opts.cancellation_manager = &c_mgr; - BorrowedArgsCallFrame frame(args, &captured_func_->captured_inputs(), - ret_types_); + BorrowedArgsCallFrame frame(args, &captured_inputs_, ret_types_); Notification n; Status s; - lib_->Run(f_opts, f_handle_, &frame, [&n, &s](Status func_status) { + lib->Run(f_opts, handle, &frame, [&n, &s](Status func_status) { s.Update(func_status); n.Notify(); }); @@ -323,25 +345,33 @@ Status InstantiatedCapturedFunction::RunInstantiated( return frame.ConsumeRetvals(rets); } -void InstantiatedCapturedFunction::RunAsync( - IteratorContext* ctx, std::vector&& args, std::vector* rets, - FunctionLibraryRuntime::DoneCallback done, const string& prefix) const { +void CapturedFunction::RunAsync(IteratorContext* ctx, + std::vector&& args, + std::vector* rets, + FunctionLibraryRuntime::DoneCallback done, + const string& prefix) { // NOTE(mrry): This method does not transfer ownership of `ctx`, and it may // be deleted before `done` is called. Take care not to capture `ctx` in any // code that may execute asynchronously in this function. - auto frame = new OwnedArgsCallFrame( - std::move(args), &captured_func_->captured_inputs(), ret_types_); + FunctionLibraryRuntime::Handle handle; + Status s = GetHandle(ctx, &handle); + if (!s.ok()) { + done(s); + return; + } + auto frame = + new OwnedArgsCallFrame(std::move(args), &captured_inputs_, ret_types_); FunctionLibraryRuntime::Options f_opts; - f_opts.step_id = InstantiatedCapturedFunction::generate_step_id(); - ResourceMgr* resource_mgr = lib_->device()->resource_manager(); + f_opts.step_id = CapturedFunction::generate_step_id(); + ResourceMgr* resource_mgr = ctx->lib()->device()->resource_manager(); auto step_container = new ScopedStepContainer( f_opts.step_id, [resource_mgr](const string& name) { resource_mgr->Cleanup(name).IgnoreError(); }); f_opts.step_container = step_container; f_opts.runner = ctx->runner(); - if (lib_->device()->device_type() != DEVICE_CPU) { + if (ctx->lib()->device()->device_type() != DEVICE_CPU) { f_opts.create_rendezvous = true; } // TODO(mrry): Add cancellation manager support to IteratorContext @@ -396,13 +426,15 @@ void InstantiatedCapturedFunction::RunAsync( }, std::move(done), std::placeholders::_1); - lib_->Run(f_opts, f_handle_, frame, std::move(callback)); + ctx->lib()->Run(f_opts, handle, frame, std::move(callback)); } CapturedFunction::CapturedFunction(const NameAttrList& func, std::vector captured_inputs, bool use_inter_op_parallelism) : func_(func), + lib_(nullptr), + f_handle_(kInvalidHandle), captured_inputs_(std::move(captured_inputs)), use_inter_op_parallelism_(use_inter_op_parallelism) {} diff --git a/tensorflow/core/kernels/data/captured_function.h b/tensorflow/core/kernels/data/captured_function.h index 1b10725082..a10376bf97 100644 --- a/tensorflow/core/kernels/data/captured_function.h +++ b/tensorflow/core/kernels/data/captured_function.h @@ -34,41 +34,59 @@ class ResourceMgr; namespace data { -class CapturedFunction; - -// An InstantiatedCapturedFunction encapsulates all the runtime support needed -// to execute a tensorflow function. -// -// While CapturedFunction (below) encapsulates the more permanent attributes -// of the function i.e. name, captured arguments etc., -// InstantiatedCapturedFunction encapsulates the more runtime aspects i.e. -// FunctionLibraryRuntime, function handle etc. +// A `CapturedFunction` encapsulates a TensorFlow function and all of +// the runtime support required to execute it. // -// The `Iterator-`related classes use `InstantiatedCapturedFunction` to execute -// functions. -class InstantiatedCapturedFunction { +// The `Dataset`-related classes use `CapturedFunction` to execute +// TensorFlow functions outside a the normal `OpKernel::Compute()` +// context. +class CapturedFunction { public: - ~InstantiatedCapturedFunction(); + // Creates a new instance using a list of named attributes, fetching captured + // inputs from a context argument. + static Status Create(const NameAttrList& func, OpKernelContext* ctx, + const string& argument, + std::unique_ptr* out_function); - // Runs the "Instantiated Captured function". This method takes ownership of - // the tensors in `args`, in order to be able to deallocate them as early as + // Creates a new instance using a list of named attributes, fetching captured + // inputs from a context argument. + // + // If `use_inter_op_parallelism` is false, the runtime may use an executor + // that is optimized for small functions. + static Status Create(const NameAttrList& func, OpKernelContext* ctx, + const string& argument, bool use_inter_op_parallelism, + std::unique_ptr* out_function); + + ~CapturedFunction(); + + // Runs the "Captured function" using the given FLR and caches the lib and + // handle generated during instantiation. If Run is called with a different + // lib afterwards, generates an error. This method takes ownership of the + // tensors in `args`, in order to be able to deallocate them as early as // possible. Use `RunWithBorrowedArgs()` if the caller needs to retain // ownership of the `args`. Status Run(IteratorContext* ctx, std::vector&& args, - std::vector* rets) const; + std::vector* rets); // Synchronously runs the captured function on the given `args`, and stores // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when // possible. Status RunWithBorrowedArgs(IteratorContext* ctx, const std::vector& args, - std::vector* rets) const; + std::vector* rets); - // Synchronously runs the captured function on the given `args`, and stores - // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when - // possible. This can be useful for calling a captured + // Explicitly instantiate this function for use in the given + // context. This method, and the context-less overload + // `RunInstantiated()` below can be useful for calling a captured // function in cases where an `IteratorContext*` is not available // (such as a destructor). + Status Instantiate(IteratorContext* ctx); + + // Synchronously runs the captured function on the given `args`, and stores + // the results in `*rets`. Prefer to use `Run()` or `RunAsync()` when + // possible. + // + // REQUIRES: `this->Instantiate()` must have been called before this method. Status RunInstantiated(const std::vector& args, std::vector* rets); @@ -79,9 +97,16 @@ class InstantiatedCapturedFunction { void RunAsync(IteratorContext* ctx, std::vector&& args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done, - const string& prefix) const; + const string& prefix); + + // Returns the named list of function arguments. + const NameAttrList& func() { return func_; } - // Returns a step ID for use when running an `InstantiatedCapturedFunction`. + // Returns that additional captured inputs that will be passed to the function + // when `Run*()` is called. + const std::vector& captured_inputs() { return captured_inputs_; } + + // Returns a step ID for use when running a `CapturedFunction`. static int64 generate_step_id() { // Choose a step ID that is guaranteed not to clash with any // Session-generated step ID. DirectSession only generates @@ -91,65 +116,21 @@ class InstantiatedCapturedFunction { return -std::abs(static_cast(random::New64())); } - private: - InstantiatedCapturedFunction( - FunctionLibraryRuntime* lib, FunctionLibraryRuntime::Handle f_handle, - DataTypeVector ret_types, - std::function)> runner, - CapturedFunction* captured_func); - - friend class CapturedFunction; - - FunctionLibraryRuntime* const lib_; - const FunctionLibraryRuntime::Handle f_handle_; - const DataTypeVector ret_types_; - std::function)> captured_runner_; - CapturedFunction* const captured_func_; - - TF_DISALLOW_COPY_AND_ASSIGN(InstantiatedCapturedFunction); -}; - -// A `CapturedFunction` encapsulates a TensorFlow function. -// -// The `Dataset`-related classes use `CapturedFunction` to execute -// TensorFlow functions outside a the normal `OpKernel::Compute()` -// context. -class CapturedFunction { - public: - // Creates a new instance using a list of named attributes, fetching captured - // inputs from a context argument. - static Status Create(const NameAttrList& func, OpKernelContext* ctx, - const string& argument, - std::unique_ptr* out_function); - - // Creates a new instance using a list of named attributes, fetching captured - // inputs from a context argument. - // - // If `use_inter_op_parallelism` is false, the runtime may use an executor - // that is optimized for small functions. - static Status Create(const NameAttrList& func, OpKernelContext* ctx, - const string& argument, bool use_inter_op_parallelism, - std::unique_ptr* out_function); - - // Instantiates this function for use in the given context, providing an - // InstantiatedCapturedFunction that can be used to execute functions. - Status Instantiate(IteratorContext* ctx, - std::unique_ptr* - instantiated_captured_function); - - // Returns the named list of function arguments. - const NameAttrList& func() { return func_; } - - // Returns that additional captured inputs that will be passed to the function - const std::vector& captured_inputs() { return captured_inputs_; } - private: CapturedFunction(const NameAttrList& func, std::vector captured_inputs, bool use_inter_op_parallelism); + Status GetHandle(IteratorContext* ctx, + FunctionLibraryRuntime::Handle* out_handle); + + mutex mu_; const NameAttrList func_; + FunctionLibraryRuntime* lib_ GUARDED_BY(mu_); + FunctionLibraryRuntime::Handle f_handle_ GUARDED_BY(mu_); const std::vector captured_inputs_; + DataTypeSlice ret_types_; + std::function)> captured_runner_ = nullptr; const bool use_inter_op_parallelism_; TF_DISALLOW_COPY_AND_ASSIGN(CapturedFunction); diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index 36a1837295..e7ac368ae3 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -21,13 +21,12 @@ namespace data { Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, - int64 thread_index, - const InstantiatedCapturedFunction& instantiated_captured_func, - StringPiece prefix, std::unique_ptr* out_iterator) { + int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, + std::unique_ptr* out_iterator) { std::vector return_values; - TF_RETURN_IF_ERROR(instantiated_captured_func.RunWithBorrowedArgs( - ctx, input_element, &return_values)); + TF_RETURN_IF_ERROR( + captured_func->RunWithBorrowedArgs(ctx, input_element, &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT && TensorShapeUtils::IsScalar(return_values[0].shape()))) { diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 3de157b4bc..234856ea39 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -24,9 +24,8 @@ namespace data { Status MakeIteratorFromInputElement( IteratorContext* ctx, const std::vector& input_element, - int64 thread_index, - const InstantiatedCapturedFunction& instantiated_captured_func, - StringPiece prefix, std::unique_ptr* out_iterator); + int64 thread_index, CapturedFunction* captured_func, StringPiece prefix, + std::unique_ptr* out_iterator); } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc index dfdc16f347..19c35f94a6 100644 --- a/tensorflow/core/kernels/data/filter_dataset_op.cc +++ b/tensorflow/core/kernels/data/filter_dataset_op.cc @@ -131,10 +131,9 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - virtual Status EvaluatePredicate( - IteratorContext* ctx, - InstantiatedCapturedFunction* instantiated_captured_function, - const std::vector& element, bool* out_matched) const = 0; + virtual Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const = 0; private: class Iterator : public DatasetIterator { @@ -145,8 +144,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } Status GetNextInternal(IteratorContext* ctx, @@ -173,8 +171,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - TF_RETURN_IF_ERROR(dataset()->EvaluatePredicate( - ctx, instantiated_captured_func_.get(), *out_tensors, &matched)); + TF_RETURN_IF_ERROR( + dataset()->EvaluatePredicate(ctx, *out_tensors, &matched)); if (!matched) { // Clear the output tensor list since it didn't match. out_tensors->clear(); @@ -208,7 +206,6 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { private: mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; @@ -223,15 +220,14 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { using FilterDatasetBase::FilterDatasetBase; protected: - Status EvaluatePredicate( - IteratorContext* ctx, - InstantiatedCapturedFunction* instantiated_captured_function, - const std::vector& element, bool* out_matched) const override { + Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const override { // TODO(mrry): Avoid blocking a threadpool thread. We will need to // stack-rip the iterators and use async kernels. std::vector result; - TF_RETURN_IF_ERROR(instantiated_captured_function->RunWithBorrowedArgs( - ctx, element, &result)); + TF_RETURN_IF_ERROR( + captured_func_->RunWithBorrowedArgs(ctx, element, &result)); if (result.size() != 1 || result[0].dtype() != DT_BOOL || result[0].NumElements() != 1) { @@ -253,10 +249,9 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { index_(index) {} protected: - Status EvaluatePredicate( - IteratorContext* ctx, - InstantiatedCapturedFunction* instantiated_captured_function, - const std::vector& element, bool* out_matched) const override { + Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const override { const Tensor& predicate = element[index_]; if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { return errors::InvalidArgument( diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc index 3af8162137..2fada22a21 100644 --- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc @@ -122,8 +122,7 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } Status GetNextInternal(IteratorContext* ctx, @@ -239,7 +238,8 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { EXCLUSIVE_LOCKS_REQUIRED(mu_) { return MakeIteratorFromInputElement( ctx, captured_func_inputs_, element_index_++, - *instantiated_captured_func_, prefix(), ¤t_element_iterator_); + dataset()->captured_func_.get(), prefix(), + ¤t_element_iterator_); } Status BuildCurrentElementIteratorLocked(OpKernelContext* ctx) @@ -257,7 +257,6 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr input_impl_ GUARDED_BY(mu_); std::unique_ptr current_element_iterator_ GUARDED_BY(mu_); std::vector captured_func_inputs_ GUARDED_BY(mu_); - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/generator_dataset_op.cc b/tensorflow/core/kernels/data/generator_dataset_op.cc index c7d8cfce90..71a36314a0 100644 --- a/tensorflow/core/kernels/data/generator_dataset_op.cc +++ b/tensorflow/core/kernels/data/generator_dataset_op.cc @@ -73,8 +73,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { ~Iterator() override { if (!finalized_) { std::vector ignored; - Status s = - instantiated_finalize_func_->RunInstantiated(state_, &ignored); + Status s = dataset()->finalize_func_->RunInstantiated(state_, &ignored); if (!s.ok()) { LOG(WARNING) << "Error occurred when finalizing GeneratorDataset iterator: " @@ -84,14 +83,11 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { } Status Initialize(IteratorContext* ctx) override { + TF_RETURN_IF_ERROR(dataset()->init_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->next_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate(ctx)); TF_RETURN_IF_ERROR( - dataset()->init_func_->Instantiate(ctx, &instantiated_init_func_)); - TF_RETURN_IF_ERROR( - dataset()->next_func_->Instantiate(ctx, &instantiated_next_func_)); - TF_RETURN_IF_ERROR(dataset()->finalize_func_->Instantiate( - ctx, &instantiated_finalize_func_)); - TF_RETURN_IF_ERROR( - instantiated_init_func_->RunWithBorrowedArgs(ctx, {}, &state_)); + dataset()->init_func_->RunWithBorrowedArgs(ctx, {}, &state_)); return Status::OK(); } @@ -105,8 +101,8 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { return Status::OK(); } - Status s = instantiated_next_func_->RunWithBorrowedArgs(ctx, state_, - out_tensors); + Status s = + dataset()->next_func_->RunWithBorrowedArgs(ctx, state_, out_tensors); if (s.ok()) { *end_of_sequence = false; } else if (errors::IsOutOfRange(s)) { @@ -119,7 +115,7 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { // finalize function. std::vector ignored; TF_RETURN_IF_ERROR( - instantiated_finalize_func_->RunInstantiated(state_, &ignored)); + dataset()->finalize_func_->RunInstantiated(state_, &ignored)); finalized_ = true; } return s; @@ -129,9 +125,6 @@ class GeneratorDatasetOp::Dataset : public DatasetBase { mutex mu_; bool finalized_ GUARDED_BY(mu_) = false; std::vector state_ GUARDED_BY(mu_); - std::unique_ptr instantiated_init_func_; - std::unique_ptr instantiated_next_func_; - std::unique_ptr instantiated_finalize_func_; }; const std::unique_ptr init_func_; diff --git a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc index 9cfcbbf8f6..d6ee42a7c6 100644 --- a/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_reducer_dataset_op.cc @@ -192,14 +192,11 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate( - ctx, &instantiated_key_func_)); - TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate( - ctx, &instantiated_init_func_)); - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate( - ctx, &instantiated_reduce_func_)); - TF_RETURN_IF_ERROR(dataset()->captured_finalize_func_->Instantiate( - ctx, &instantiated_finalize_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR( + dataset()->captured_finalize_func_->Instantiate(ctx)); return Status::OK(); } @@ -217,8 +214,9 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { if (!end_of_input_) { // Run the key function on the input element. std::vector key_func_output; - TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs( - ctx, next_input_element, &key_func_output)); + TF_RETURN_IF_ERROR( + dataset()->captured_key_func_->RunWithBorrowedArgs( + ctx, next_input_element, &key_func_output)); if (key_func_output.size() != 1 || key_func_output[0].dtype() != DT_INT64 || @@ -232,7 +230,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { if (states_.find(key) == states_.end()) { // Run the init function to create the initial state. std::vector init_func_output; - TF_RETURN_IF_ERROR(instantiated_init_func_->Run( + TF_RETURN_IF_ERROR(dataset()->captured_init_func_->Run( ctx, std::move(key_func_output), &init_func_output)); states_[key] = init_func_output; } @@ -246,7 +244,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { std::back_inserter(args)); std::vector reduce_func_output; - TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run( + TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run( ctx, std::move(args), &reduce_func_output)); states_[key] = reduce_func_output; } else { @@ -262,8 +260,9 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { *end_of_sequence = true; return Status::OK(); } - TF_RETURN_IF_ERROR(instantiated_finalize_func_->RunWithBorrowedArgs( - ctx, states_[keys_[keys_index_++]], out_tensors)); + TF_RETURN_IF_ERROR( + dataset()->captured_finalize_func_->RunWithBorrowedArgs( + ctx, states_[keys_[keys_index_++]], out_tensors)); *end_of_sequence = false; return Status::OK(); } @@ -381,10 +380,6 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { std::map> states_ GUARDED_BY(mu_); std::vector keys_ GUARDED_BY(mu_); int64 keys_index_ GUARDED_BY(mu_) = 0; - std::unique_ptr instantiated_key_func_; - std::unique_ptr instantiated_init_func_; - std::unique_ptr instantiated_reduce_func_; - std::unique_ptr instantiated_finalize_func_; }; const NameAttrList& key_func() const { return captured_key_func_->func(); } diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc index 2ea59bee5c..8b417bb1c2 100644 --- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc @@ -176,12 +176,10 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate( - ctx, &instantiated_key_func_)); - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate( - ctx, &instantiated_reduce_func_)); - TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Instantiate( - ctx, &instantiated_window_size_func_)); + TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Instantiate(ctx)); + TF_RETURN_IF_ERROR( + dataset()->captured_window_size_func_->Instantiate(ctx)); return Status::OK(); } @@ -218,8 +216,9 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { // Run the key function on the input element to identify its // group. std::vector key_func_output; - TF_RETURN_IF_ERROR(instantiated_key_func_->RunWithBorrowedArgs( - ctx, next_input_element, &key_func_output)); + TF_RETURN_IF_ERROR( + dataset()->captured_key_func_->RunWithBorrowedArgs( + ctx, next_input_element, &key_func_output)); if (key_func_output.size() != 1 || key_func_output[0].dtype() != DT_INT64 || @@ -234,7 +233,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { // Run the window size function on the key to identify its // window size. std::vector window_size_func_output; - TF_RETURN_IF_ERROR(instantiated_window_size_func_->Run( + TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run( ctx, std::move(key_func_output), &window_size_func_output)); if (window_size_func_output.size() != 1 || @@ -449,8 +448,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { std::vector args( {std::move(key_arg), std::move(group_dataset_arg)}); std::vector return_values; - TF_RETURN_IF_ERROR(instantiated_reduce_func_->Run(ctx, std::move(args), - &return_values)); + TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run( + ctx, std::move(args), &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_VARIANT && @@ -479,10 +478,6 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { std::map>> groups_ GUARDED_BY(mu_); std::unique_ptr current_group_iterator_ GUARDED_BY(mu_); std::map window_sizes_ GUARDED_BY(mu_); - std::unique_ptr instantiated_key_func_; - std::unique_ptr instantiated_reduce_func_; - std::unique_ptr - instantiated_window_size_func_; }; Status OtherArgumentsNodeAndType( diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc index 91c298ce9a..0aa802b874 100644 --- a/tensorflow/core/kernels/data/interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc @@ -149,8 +149,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } void AdvanceToNextInCycle() EXCLUSIVE_LOCKS_REQUIRED(mu_) { @@ -196,7 +195,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { if (!end_of_input_) { TF_RETURN_IF_ERROR(MakeIteratorFromInputElement( ctx, args_list_[cycle_index_], cycle_index_, - *instantiated_captured_func_, prefix(), + dataset()->captured_func_.get(), prefix(), ¤t_elements_[cycle_index_])); ++num_open_; } @@ -282,7 +281,7 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { &args_list_[idx][i])); } TF_RETURN_IF_ERROR(MakeIteratorFromInputElement( - ctx, args_list_[idx], idx, *instantiated_captured_func_, + ctx, args_list_[idx], idx, dataset()->captured_func_.get(), prefix(), ¤t_elements_[idx])); TF_RETURN_IF_ERROR( RestoreInput(ctx, reader, current_elements_[idx])); @@ -302,7 +301,6 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel { int64 block_index_ GUARDED_BY(mu_) = 0; bool end_of_input_ GUARDED_BY(mu_) = false; size_t num_open_ GUARDED_BY(mu_) = 0; - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc index 71d3335452..83896219a3 100644 --- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc @@ -218,8 +218,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { } TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } Status GetNextInternal(IteratorContext* ctx, @@ -376,7 +375,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::vector input_element) { std::shared_ptr> return_values( new std::vector()); - instantiated_captured_func_->RunAsync( + dataset()->captured_func_->RunAsync( ctx.get(), std::move(input_element), return_values.get(), [this, ctx, result, return_values, offset](Status status) { Callback(ctx, result, return_values, offset, status); @@ -673,7 +672,6 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel { std::deque> batch_results_ GUARDED_BY(mu_); std::unique_ptr runner_thread_ GUARDED_BY(mu_); bool cancelled_ GUARDED_BY(mu_) = false; - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index 5b891b4fd5..f112e1dc43 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -122,8 +122,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } Status GetNextInternal(IteratorContext* ctx, @@ -143,7 +142,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { // TODO(mrry): Avoid blocking a threadpool thread. We will need to // stack-rip the iterators and use async kernels. Status s = - instantiated_captured_func_->Run(ctx, std::move(args), out_tensors); + dataset()->captured_func_->Run(ctx, std::move(args), out_tensors); if (errors::IsOutOfRange(s)) { // `f` may deliberately raise `errors::OutOfRange` to indicate // that we should terminate the iteration early. @@ -168,7 +167,6 @@ class MapDatasetOp : public UnaryDatasetOpKernel { private: std::unique_ptr input_impl_; - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 448cc93a8c..9cd46bf5dd 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -247,8 +247,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { AddConstantParameter(ctx, "parallelism", dataset()->cycle_length_); TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } // It is implemented so that it matches the deterministic interleave @@ -686,7 +685,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { worker_thread_states_[thread_index].iterator_creation_status = MakeIteratorFromInputElement( ctx.get(), worker_thread_states_[thread_index].input, - thread_index, *instantiated_captured_func_, prefix(), + thread_index, dataset()->captured_func_.get(), prefix(), &worker_thread_states_[thread_index].iterator); iterator_creation_status = worker_thread_states_[thread_index].iterator_creation_status; @@ -920,7 +919,7 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr iterator; Status s = MakeIteratorFromInputElement( ctx, worker_thread_states_[index].input, index, - *instantiated_captured_func_, prefix(), &iterator); + dataset()->captured_func_.get(), prefix(), &iterator); TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, iterator)); worker_thread_states_[index].iterator.swap(iterator); } @@ -1048,7 +1047,6 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel { // threads have exited before any other members are deallocated. // TODO(b/65178177): Avoid allocating additional threads. std::vector> worker_threads_ GUARDED_BY(mu_); - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; @@ -1256,8 +1254,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { AddConstantParameter(ctx, "cycle_length", dataset()->cycle_length_); TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } Status GetNextInternal(IteratorContext* ctx, @@ -1493,7 +1490,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { if (!end_of_input_) { Status status = MakeIteratorFromInputElement( ctx.get(), args_list_[cycle_index_], cycle_index_, - *instantiated_captured_func_, prefix(), + dataset()->captured_func_.get(), prefix(), ¤t_elements_[cycle_index_]); if (!status.ok()) { invocation_results_.emplace_back(new InvocationResult()); @@ -1602,7 +1599,7 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { &args_list_[idx][i])); } TF_RETURN_IF_ERROR(MakeIteratorFromInputElement( - ctx, args_list_[idx], idx, *instantiated_captured_func_, + ctx, args_list_[idx], idx, dataset()->captured_func_.get(), prefix(), ¤t_elements_[idx])); TF_RETURN_IF_ERROR( RestoreInput(ctx, reader, current_elements_[idx])); @@ -1662,7 +1659,6 @@ class ParallelInterleaveDatasetV2Op : public UnaryDatasetOpKernel { // Identifies whether background activity should be cancelled. bool cancelled_ GUARDED_BY(mu_) = false; - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 822f06be9e..6abe6c8338 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -85,11 +85,29 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { + auto init_func = [this](IteratorContext* ctx) { + return captured_func_->Instantiate(ctx); + }; + const string& new_prefix = strings::StrCat(prefix, "::ParallelMap"); - std::unique_ptr parallel_map_dataset_functor( - new ParallelMapDatasetFunctor(this, new_prefix)); + ParallelMapIteratorFunction map_func = + [this, new_prefix](IteratorContext* ctx, + std::vector input_element, + std::vector* result, StatusCallback done) { + captured_func_->RunAsync(ctx, std::move(input_element), result, + std::move(done), new_prefix); + }; + if (!use_inter_op_parallelism_) { + map_func = [map_func]( + IteratorContext* ctx, std::vector input_element, + std::vector* result, StatusCallback done) { + (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element), + result, std::move(done))); + }; + } + return NewParallelMapIterator({this, new_prefix}, input_, - std::move(parallel_map_dataset_functor), + std::move(init_func), std::move(map_func), num_parallel_calls_); } @@ -151,39 +169,6 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { } private: - class ParallelMapDatasetFunctor : public ParallelMapFunctor { - public: - ParallelMapDatasetFunctor(const Dataset* dataset, const string& prefix) - : dataset_(dataset), prefix_(prefix) {} - - Status InitFunc(IteratorContext* ctx) override { - return dataset_->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); - } - - void MapFunc(IteratorContext* ctx, std::vector input_element, - std::vector* result, StatusCallback done) override { - auto map_func = [this](IteratorContext* ctx, - std::vector input_element, - std::vector* result, - StatusCallback done) { - instantiated_captured_func_->RunAsync( - ctx, std::move(input_element), result, std::move(done), prefix_); - }; - if (!dataset_->use_inter_op_parallelism_) { - (*ctx->runner())(std::bind(map_func, ctx, std::move(input_element), - result, std::move(done))); - } else { - map_func(ctx, std::move(input_element), result, std::move(done)); - } - } - - private: - const Dataset* dataset_; - const string prefix_; - std::unique_ptr instantiated_captured_func_; - }; - const DatasetBase* const input_; const NameAttrList func_; const int32 num_parallel_calls_; diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc index 4f8e0489de..5f6052ce83 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.cc +++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc @@ -31,11 +31,12 @@ class ParallelMapIterator : public DatasetBaseIterator { explicit ParallelMapIterator( const typename DatasetBaseIterator::BaseParams& params, const DatasetBase* input_dataset, - std::unique_ptr parallel_map_functor, - int32 num_parallel_calls) + std::function init_func, + ParallelMapIteratorFunction map_func, int32 num_parallel_calls) : DatasetBaseIterator(params), input_dataset_(input_dataset), - parallel_map_functor_(std::move(parallel_map_functor)), + init_func_(std::move(init_func)), + map_func_(std::move(map_func)), num_parallel_calls_(num_parallel_calls) {} ~ParallelMapIterator() override { @@ -76,7 +77,10 @@ class ParallelMapIterator : public DatasetBaseIterator { } TF_RETURN_IF_ERROR( input_dataset_->MakeIterator(ctx, prefix(), &input_impl_)); - return parallel_map_functor_->InitFunc(ctx); + if (init_func_) { + TF_RETURN_IF_ERROR(init_func_(ctx)); + } + return Status::OK(); } Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, @@ -222,8 +226,8 @@ class ParallelMapIterator : public DatasetBaseIterator { CallCompleted(result); }; - parallel_map_functor_->MapFunc(ctx.get(), std::move(input_element), - &result->return_values, std::move(done)); + map_func_(ctx.get(), std::move(input_element), &result->return_values, + std::move(done)); } Status ProcessResult(const std::shared_ptr& result, @@ -319,7 +323,8 @@ class ParallelMapIterator : public DatasetBaseIterator { } const DatasetBase* const input_dataset_; // Not owned. - std::unique_ptr parallel_map_functor_; + const std::function init_func_; + const ParallelMapIteratorFunction map_func_; // Used for coordination between the main thread and the runner thread. mutex mu_; // Used for coordination between the main thread and the runner thread. In @@ -344,12 +349,20 @@ class ParallelMapIterator : public DatasetBaseIterator { std::unique_ptr NewParallelMapIterator( const DatasetBaseIterator::BaseParams& params, - const DatasetBase* input_dataset, - std::unique_ptr parallel_map_functor, + const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func, int32 num_parallel_calls) { - return std::unique_ptr(new ParallelMapIterator( - params, input_dataset, std::move(parallel_map_functor), - num_parallel_calls)); + return NewParallelMapIterator(params, input_dataset, nullptr, + std::move(map_func), num_parallel_calls); +} + +std::unique_ptr NewParallelMapIterator( + const DatasetBaseIterator::BaseParams& params, + const DatasetBase* input_dataset, + std::function init_func, + ParallelMapIteratorFunction map_func, int32 num_parallel_calls) { + return std::unique_ptr( + new ParallelMapIterator(params, input_dataset, std::move(init_func), + std::move(map_func), num_parallel_calls)); } } // namespace data diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.h b/tensorflow/core/kernels/data/parallel_map_iterator.h index 62e57e5335..dc26c5cf25 100644 --- a/tensorflow/core/kernels/data/parallel_map_iterator.h +++ b/tensorflow/core/kernels/data/parallel_map_iterator.h @@ -22,32 +22,30 @@ limitations under the License. namespace tensorflow { namespace data { -class ParallelMapFunctor { - public: - virtual ~ParallelMapFunctor() {} - - // A function that runs when the Iterator is initialized. It enables the user - // to specify error checking logic that can fail early. - virtual Status InitFunc(IteratorContext* ctx) { return Status::OK(); } - - // A function that transforms elements of one dataset into another - // asynchronously. The arguments are: - // 1. An `IteratorContext*` for the context in which the function should - // execute. - // 2. A `std::vector` containing the input element. - // 3. A `std::vector*` to which the function will write the result. - // 4. A `StatusCallback` that should be invoked when the function is complete. - virtual void MapFunc(IteratorContext* ctx, std::vector input, - std::vector* output, - StatusCallback callback) = 0; -}; - -// Returns a new iterator that uses `parallel_map_functor` to apply `MapFunc` -// to the elements of `input_dataset` using the given degree of parallelism. +// A function that transforms elements of one dataset into another +// asynchronously. The arguments are: +// 1. An `IteratorContext*` for the context in which the function should +// execute. +// 2. A `std::vector` containing the input element. +// 3. A `std::vector*` to which the function will write the result. +// 4. A `StatusCallback` that should be invoked when the function is complete. +using ParallelMapIteratorFunction = + std::function, + std::vector*, StatusCallback)>; + +// Returns a new iterator that applies `map_func` to the elements of +// `input_dataset` using the given degree of parallelism. `init_func` (if +// specified) will be executed when the iterator is initialized (see +// `IteratorBase::Initialize()`) and enables the user to specify error checking +// logic that can fail early. std::unique_ptr NewParallelMapIterator( const DatasetBaseIterator::BaseParams& params, const DatasetBase* input_dataset, - std::unique_ptr parallel_map_functor, + std::function init_func, + ParallelMapIteratorFunction map_func, int32 num_parallel_calls); +std::unique_ptr NewParallelMapIterator( + const DatasetBaseIterator::BaseParams& params, + const DatasetBase* input_dataset, ParallelMapIteratorFunction map_func, int32 num_parallel_calls); } // namespace data diff --git a/tensorflow/core/kernels/data/parse_example_dataset_op.cc b/tensorflow/core/kernels/data/parse_example_dataset_op.cc index 32210ef677..c28c06da62 100644 --- a/tensorflow/core/kernels/data/parse_example_dataset_op.cc +++ b/tensorflow/core/kernels/data/parse_example_dataset_op.cc @@ -182,80 +182,9 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { std::unique_ptr MakeIteratorInternal( const string& prefix) const override { - std::unique_ptr parse_example_functor( - new ParseExampleFunctor(this)); - return NewParallelMapIterator( - {this, strings::StrCat(prefix, "::ParseExample")}, input_, - std::move(parse_example_functor), num_parallel_calls_); - } - - const DataTypeVector& output_dtypes() const override { - return output_types_; - } - - const std::vector& output_shapes() const override { - return output_shapes_; - } - - string DebugString() const override { - return "ParseExampleDatasetOp::Dataset"; - } - - protected: - Status AsGraphDefInternal(SerializationContext* ctx, - DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); - - Node* num_parallle_calls_node; - std::vector dense_defaults_nodes; - dense_defaults_nodes.reserve(dense_defaults_.size()); - - TF_RETURN_IF_ERROR( - b->AddScalar(num_parallel_calls_, &num_parallle_calls_node)); - - for (const Tensor& dense_default : dense_defaults_) { - Node* node; - TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node)); - dense_defaults_nodes.emplace_back(node); - } - - AttrValue sparse_keys_attr; - AttrValue dense_keys_attr; - AttrValue sparse_types_attr; - AttrValue dense_attr; - AttrValue dense_shapes_attr; - - b->BuildAttrValue(sparse_keys_, &sparse_keys_attr); - b->BuildAttrValue(dense_keys_, &dense_keys_attr); - b->BuildAttrValue(sparse_types_, &sparse_types_attr); - b->BuildAttrValue(dense_types_, &dense_attr); - b->BuildAttrValue(dense_shapes_, &dense_shapes_attr); - - TF_RETURN_IF_ERROR(b->AddDataset(this, - { - {0, input_graph_node}, - {1, num_parallle_calls_node}, - }, - {{2, dense_defaults_nodes}}, - {{"sparse_keys", sparse_keys_attr}, - {"dense_keys", dense_keys_attr}, - {"sparse_types", sparse_types_attr}, - {"Tdense", dense_attr}, - {"dense_shapes", dense_shapes_attr}}, - output)); - return Status::OK(); - } - - private: - class ParseExampleFunctor : public ParallelMapFunctor { - public: - explicit ParseExampleFunctor(const Dataset* dataset) - : dataset_(dataset) {} - - void MapFunc(IteratorContext* ctx, std::vector input_element, - std::vector* result, StatusCallback done) override { + auto map_fn = [this](IteratorContext* ctx, + std::vector input_element, + std::vector* result, StatusCallback done) { (*ctx->runner())([this, ctx, input_element, result, done]() { thread::ThreadPool* device_threadpool = ctx->lib()->device()->tensorflow_cpu_worker_threads()->workers; @@ -267,7 +196,7 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { for (auto it = slice.begin(); it != slice.end(); it++) slice_vec.push_back(*it); } - example::FastParseExampleConfig config = dataset_->config_; + example::FastParseExampleConfig config = config_; // local copy of config_ for modification. auto stats_aggregator = ctx->stats_aggregator(); if (stats_aggregator) { @@ -277,50 +206,43 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { Status s = FastParseExample(config, slice_vec, {}, device_threadpool, &example_result); if (s.ok()) { - (*result).resize(dataset_->key_to_output_index_.size()); - for (int d = 0; d < dataset_->dense_keys_.size(); ++d) { - int output_index = - dataset_->key_to_output_index_.at(dataset_->dense_keys_[d]); - DCHECK(example_result.dense_values[d].dtype() == - dataset_->output_dtypes()[output_index]) + (*result).resize(key_to_output_index_.size()); + for (int d = 0; d < dense_keys_.size(); ++d) { + int output_index = key_to_output_index_.at(dense_keys_[d]); + CHECK(example_result.dense_values[d].dtype() == + output_dtypes()[output_index]) << "Got wrong type for FastParseExample return value " << d << " (expected " - << DataTypeString(dataset_->output_dtypes()[output_index]) - << ", got " + << DataTypeString(output_dtypes()[output_index]) << ", got " << DataTypeString(example_result.dense_values[d].dtype()) << ")."; - DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith( + CHECK(output_shapes()[output_index].IsCompatibleWith( example_result.dense_values[d].shape())) << "Got wrong shape for FastParseExample return value " << d << " (expected " - << dataset_->output_shapes()[output_index].DebugString() - << ", got " + << output_shapes()[output_index].DebugString() << ", got " << example_result.dense_values[d].shape().DebugString() << ")."; (*result)[output_index] = example_result.dense_values[d]; } - for (int d = 0; d < dataset_->sparse_keys_.size(); ++d) { + for (int d = 0; d < sparse_keys_.size(); ++d) { Tensor serialized_sparse = Tensor(DT_VARIANT, TensorShape({3})); auto serialized_sparse_t = serialized_sparse.vec(); serialized_sparse_t(0) = example_result.sparse_indices[d]; serialized_sparse_t(1) = example_result.sparse_values[d]; serialized_sparse_t(2) = example_result.sparse_shapes[d]; - int output_index = - dataset_->key_to_output_index_.at(dataset_->sparse_keys_[d]); - DCHECK(serialized_sparse.dtype() == - dataset_->output_dtypes()[output_index]) + int output_index = key_to_output_index_.at(sparse_keys_[d]); + CHECK(serialized_sparse.dtype() == output_dtypes()[output_index]) << "Got wrong type for FastParseExample return value " << d << " (expected " - << DataTypeString(dataset_->output_dtypes()[output_index]) - << ", got " << DataTypeString(serialized_sparse.dtype()) - << ")."; - DCHECK(dataset_->output_shapes()[output_index].IsCompatibleWith( + << DataTypeString(output_dtypes()[output_index]) << ", got " + << DataTypeString(serialized_sparse.dtype()) << ")."; + CHECK(output_shapes()[output_index].IsCompatibleWith( serialized_sparse.shape())) << "Got wrong shape for FastParseExample return value " << d << " (expected " - << dataset_->output_shapes()[output_index].DebugString() - << ", got " << serialized_sparse.shape().DebugString() - << ")."; + << output_shapes()[output_index].DebugString() << ", got " + << serialized_sparse.shape().DebugString() << ")."; (*result)[output_index] = serialized_sparse; } // TODO(b/111553342): User provided tags instead of fixed tag. @@ -346,12 +268,73 @@ class ParseExampleDatasetOp : public UnaryDatasetOpKernel { } done(s); }); + }; + + return NewParallelMapIterator( + {this, strings::StrCat(prefix, "::ParseExample")}, input_, + std::move(map_fn), num_parallel_calls_); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() const override { + return "ParseExampleDatasetOp::Dataset"; + } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); + + Node* num_parallle_calls_node; + std::vector dense_defaults_nodes; + dense_defaults_nodes.reserve(dense_defaults_.size()); + + TF_RETURN_IF_ERROR( + b->AddScalar(num_parallel_calls_, &num_parallle_calls_node)); + + for (const Tensor& dense_default : dense_defaults_) { + Node* node; + TF_RETURN_IF_ERROR(b->AddTensor(dense_default, &node)); + dense_defaults_nodes.emplace_back(node); } - private: - const Dataset* dataset_; - }; + AttrValue sparse_keys_attr; + AttrValue dense_keys_attr; + AttrValue sparse_types_attr; + AttrValue dense_attr; + AttrValue dense_shapes_attr; + + b->BuildAttrValue(sparse_keys_, &sparse_keys_attr); + b->BuildAttrValue(dense_keys_, &dense_keys_attr); + b->BuildAttrValue(sparse_types_, &sparse_types_attr); + b->BuildAttrValue(dense_types_, &dense_attr); + b->BuildAttrValue(dense_shapes_, &dense_shapes_attr); + + TF_RETURN_IF_ERROR(b->AddDataset(this, + { + {0, input_graph_node}, + {1, num_parallle_calls_node}, + }, + {{2, dense_defaults_nodes}}, + {{"sparse_keys", sparse_keys_attr}, + {"dense_keys", dense_keys_attr}, + {"sparse_types", sparse_types_attr}, + {"Tdense", dense_attr}, + {"dense_shapes", dense_shapes_attr}}, + output)); + return Status::OK(); + } + private: const DatasetBase* const input_; const std::vector dense_defaults_; const std::vector sparse_keys_; diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc index d9fdd59bf0..dbe31f37b8 100644 --- a/tensorflow/core/kernels/data/scan_dataset_op.cc +++ b/tensorflow/core/kernels/data/scan_dataset_op.cc @@ -144,8 +144,7 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { Status Initialize(IteratorContext* ctx) override { TF_RETURN_IF_ERROR( dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); - return dataset()->captured_func_->Instantiate( - ctx, &instantiated_captured_func_); + return dataset()->captured_func_->Instantiate(ctx); } Status GetNextInternal(IteratorContext* ctx, @@ -170,8 +169,8 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { state_and_output.reserve(dataset()->state_types_.size() + output_dtypes().size()); - Status s = instantiated_captured_func_->Run(ctx, std::move(args), - &state_and_output); + Status s = dataset()->captured_func_->Run(ctx, std::move(args), + &state_and_output); if (s.ok()) { state_.clear(); size_t i = 0; @@ -248,7 +247,6 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { mutex mu_; std::unique_ptr input_impl_ GUARDED_BY(mu_); std::vector state_ GUARDED_BY(mu_); - std::unique_ptr instantiated_captured_func_; }; const DatasetBase* const input_; -- GitLab From 6c8f6920e8bad10429ac0b88abbe0ace5a5e9a72 Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Tue, 18 Sep 2018 15:27:47 -0700 Subject: [PATCH 0342/1357] Updates documentation of Estimator.predict to note that an issue with yielding and graph context. PiperOrigin-RevId: 213528782 --- tensorflow/python/estimator/estimator.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index ff2baa0465..ffe1e30da0 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -490,6 +490,10 @@ class Estimator(object): yield_single_examples=True): """Yields predictions for given features. + Please note that interleaving two predict outputs does not work. See: + [issue/20506]( + https://github.com/tensorflow/tensorflow/issues/20506#issuecomment-422208517) + Args: input_fn: A function that constructs the features. Prediction continues until `input_fn` raises an end-of-input exception -- GitLab From e1a32c98210f8ebba42a0397259d948e1433c09e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 18 Sep 2018 15:42:44 -0700 Subject: [PATCH 0343/1357] "Isolate" must-be-constant side effecting operations I first tried to fix this issue in cr/209996730 but didn't quite fix the problem for for XLA_* devices. A node assigned to an XLA_* device must be compiled so the cr/209996730 fix of simply not compiling the nodes doesn't generalize to XLA_* devices. Instead we now "isolate" these nodes, only putting them in a trivial one-node cluster. For non-XLA devices even this trivial cluster is ignored because of flags->tf_xla_min_cluster_size. I was initially considering a more principled data-flow-analysis based solution but then decided the upfront work isn't worth it until I see a clear motivating example. PiperOrigin-RevId: 213531437 --- .../compiler/jit/mark_for_compilation_pass.cc | 73 ++++++++++++++++--- .../jit/mark_for_compilation_pass_test.cc | 66 +++++++++++++++++ .../mark_for_compilation_pass_test_helper.cc | 21 +++++- 3 files changed, 147 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index e6cc6e52ae..1eaedbfbfb 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -365,10 +365,13 @@ bool IsXlaFusable(const NodeDef& node) { return elementwise_ops->count(node.op()) > 0; } +// Nodes that XLA can compile are put in `candidates`. Nodes put in +// `isolated_nodes` must either be unclustered or be put in trivial single-node +// clusters. Status FindCompilationCandidates( const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env, const std::function& is_compilable_fn, - OrderedNodeSet* candidates) { + OrderedNodeSet* candidates, gtl::FlatSet* isolated_nodes) { OptimizerOptions opts; std::unique_ptr pflr( new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION, @@ -411,6 +414,8 @@ Status FindCompilationCandidates( DeviceType device_type(""); TF_RETURN_IF_ERROR( DeviceToDeviceType(node->assigned_device_name(), &device_type)); + VLOG(4) << "Device type for " << node->name() << ": " + << device_type.type_string(); if (is_compilable_fn && !is_compilable_fn(node, device_type)) { // is_compilable_fn has already logged the reason if it returned false. @@ -439,19 +444,56 @@ Status FindCompilationCandidates( << node->type_string(); continue; } - if (compile_time_const_nodes[node->id()] && - !registration->requires_compilation) { + if (compile_time_const_nodes[node->id()]) { const OpDef* op_def; TF_RETURN_IF_ERROR( graph.op_registry()->LookUpOpDef(node->type_string(), &op_def)); if (op_def->is_stateful()) { - // We need to be able to constant fold the nodes in - // compile_time_const_nodes given constant inputs (required by XLA) and - // therefore can't auto-cluster stateful ops since these can never be - // constant folded. - VLOG(2) << "Rejecting " << node->name() - << ": must-be-constant stateful op"; - continue; + // It is easiest to demonstrate the problem we're trying to solve with + // an example. Say we have this graph: + // + // shape = RandomUniformInt(); + // reshape = Reshape(input, shape) + // + // Both RandomUniformInt and Reshape are compilable by XLA so, absent + // any other reason, we will try to put both shape and reshape in the + // same cluster. However, since XLA only supports statically shaped + // values, it will expect to be able to constant fold `shape` to get a + // static shape for `reshape`. This is a problem because side-effecting + // ops like RandomUniformInt() cannot be constant folded. We fix this + // by putting `shape` and `reshape` in different clusters, which results + // in us recompiling `reshape`'s cluster for every new value of `shape`, + // making `reshape` statically sized within each compilation. We + // simplify the solution even further by disallowing operations like + // `shape` from being part of *any* non-trivial cluster. They're either + // not compiled by XLA altogether or, if assigned to an XLA_* device + // with "must compile" semantics, compiled into a trivial single-op + // cluster. This approach leaves some room for improvement, and we can + // consider implementing a more aggressive data-flow-analysis based + // solution in the future if needed. + // + // One ugly problem we have to contend with: certain sets of ops *have* + // to be in the same cluster because values flowing between them have + // types that can't be live-in or live-out of a cluster. These ops are: + // + // - TensorArray ops operating on the same TensorArray instance. + // - Stack ops operating on the same Stack instance. + // + // To work around this we avoid isolating these specific ops. Because + // of this concession it is unsound to auto-cluster them because then + // we'd create clusters we could not compile (because we can't constant + // fold, say, a TensorArrayRead or a StackPopV2). But we don't + // auto-cluster these operations today so we're good for now. + const XlaResourceOpInfo* op_info = + GetResourceOpInfoForOp(node->type_string()); + bool is_tensor_array_or_stack_op = + op_info && op_info->resource_kind() != XlaResourceKind::kVariable; + if (!is_tensor_array_or_stack_op) { + VLOG(2) << "Isolating " << node->name() + << ": must-be-constant stateful op"; + isolated_nodes->insert(node); + // Keep going and execute all the other checks. + } } } // We don't auto-cluster functional control flow nodes containing resource @@ -807,11 +849,12 @@ Status MarkForCompilationPass::RunImpl( Graph* graph = options.graph->get(); OrderedNodeSet compilation_candidates; + gtl::FlatSet isolated_nodes; TF_RETURN_IF_ERROR(FindCompilationCandidates( *graph, options.flib_def, (options.session_options != nullptr) ? options.session_options->env : Env::Default(), - is_compilable_fn, &compilation_candidates)); + is_compilable_fn, &compilation_candidates, &isolated_nodes)); if (compilation_candidates.empty()) { VLOG(2) << "No compilable candidates"; @@ -856,6 +899,11 @@ Status MarkForCompilationPass::RunImpl( "Found control flow node in clustering worklist: ", node_from->type_string()); } + + if (isolated_nodes.count(node_from)) { + continue; + } + string from_scope; string to_scope; for (int to : cycles.Successors(from)) { @@ -873,6 +921,9 @@ Status MarkForCompilationPass::RunImpl( node_to->assigned_device_name()) { continue; } + if (isolated_nodes.count(node_to)) { + continue; + } // Look for an _XlaScope on both nodes. If both nodes have a // scope and the scopes do not match, do not cluster along this // edge. This restriction is overridden if the global_jit_level is ON. If diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index c59770a4c8..4f9145b479 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -894,5 +894,71 @@ TEST(XlaCompilationTest, RandomShapeWithFunc) { EXPECT_EQ(clusters["fn_call"], ""); } +TEST(XlaCompilationTest, RandomShapeOnXlaDevice) { + absl::string_view xla_gpu_device = + "/job:worker/replica:0/task:0/device:XLA_GPU:0"; + + Scope root = Scope::NewRootScope().ExitOnError(); + Output shape_shape = + ops::Const(root.WithOpName("test/shape_shape"), {2}, {1}); + Output shape = + ops::RandomUniformInt(root.WithOpName("test/shape_rng"), shape_shape, + ops::Const(root.WithOpName("test/minval"), 1), + ops::Const(root.WithOpName("test/maxval"), 20)); + Output reshape_input = + ops::Placeholder(root.WithOpName("test/reshape_input"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({500, 500}))); + Output reshape = + ops::Reshape(root.WithOpName("test/reshape"), reshape_input, shape); + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + for (Node* n : graph->nodes()) { + if (absl::StartsWith(n->name(), /*prefix=*/"test/")) { + n->set_assigned_device_name(string(xla_gpu_device)); + } + } + TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph)); + + std::unordered_map clusters = GetClusters(*graph); + EXPECT_NE(clusters["test/shape_rng"], ""); + EXPECT_NE(clusters["test/reshape"], ""); + EXPECT_NE(clusters["test/shape_rng"], clusters["test/reshape"]); +} + +TEST(XlaCompilationTest, TensorArrayShapeOnXlaDevice) { + absl::string_view xla_gpu_device = + "/job:worker/replica:0/task:0/device:XLA_GPU:0"; + Scope root = Scope::NewRootScope().ExitOnError(); + ops::TensorArray tensor_array(root.WithOpName("test/tensor_array"), 1, + DT_INT32); + Output zero = ops::Const(root.WithOpName("test/zero"), 0); + ops::TensorArrayWrite tensor_array_write( + root.WithOpName("test/write"), tensor_array.handle, zero, + ops::Const(root.WithOpName("test/forty_two"), 42.0f), tensor_array.flow); + Output tensor_array_read = + ops::TensorArrayRead(root.WithOpName("test/read"), tensor_array.handle, + zero, tensor_array_write.flow_out, DT_INT32); + Output reshape = + ops::Reshape(root.WithOpName("test/reshape"), + ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT), + tensor_array_read); + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(root.ToGraph(graph.get())); + + for (Node* n : graph->nodes()) { + if (absl::StartsWith(n->name(), /*prefix=*/"test/")) { + n->set_assigned_device_name(string(xla_gpu_device)); + } + } + TF_ASSERT_OK(MarkForCompilationPassTestHelper::MarkForCompilation(&graph)); + + std::unordered_map clusters = GetClusters(*graph); + EXPECT_NE(clusters["test/read"], ""); + EXPECT_EQ(clusters["test/read"], clusters["test/reshape"]); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc index 65669877f7..d56d0f8ccf 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.cc @@ -14,18 +14,35 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/jit/mark_for_compilation_pass_test_helper.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { /*static*/ Status MarkForCompilationPassTestHelper::MarkForCompilation( std::unique_ptr* graph, FunctionLibraryDefinition* flib_def, SessionOptions* session_options) { - // Assign all nodes to the CPU device. + // Assign all unassigned nodes to the CPU device. static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0"; for (Node* n : (*graph)->nodes()) { - n->set_assigned_device_name(kCpuDevice); + if (n->assigned_device_name().empty()) { + n->set_assigned_device_name(kCpuDevice); + } } + // Call AddDevices to register the XLA devices. + // + // It may be worth refactoring out XlaOpRegistry::RegisterCompilationDevice to + // make this more direct, but probably not worth it solely for this test. + std::vector devices; + TF_RETURN_IF_ERROR(DeviceFactory::AddDevices(*session_options, "", &devices)); + + auto delete_devices = gtl::MakeCleanup([&] { + for (Device* d : devices) { + delete d; + } + }); + GraphOptimizationPassOptions opt_options; opt_options.graph = graph; opt_options.session_options = session_options; -- GitLab From 073c418695ac9ef02071de3e08394e781ceca117 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 18 Sep 2018 16:10:38 -0700 Subject: [PATCH 0344/1357] Convert more kernel signatures to use runtime shapes. PiperOrigin-RevId: 213536334 --- .../internal/optimized/optimized_ops.h | 2 - .../internal/reference/reference_ops.h | 277 +++++++++++++----- .../contrib/lite/kernels/internal/types.h | 14 +- 3 files changed, 210 insertions(+), 83 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 2fa5d6445e..6f4e135c94 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2210,7 +2210,6 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr, TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4); const int batch_size = input_shape.Dims(0); const int filter_width = filter_shape.Dims(2); @@ -2376,7 +2375,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(im2col_shape.DimensionsCount(), 4); const uint8* gemm_input_data = nullptr; const RuntimeShape* gemm_input_shape = nullptr; diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 09a4ba7701..87bcc8c219 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -163,28 +163,38 @@ SaturatingRoundingMultiplyByPOTParam( SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); } -inline void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims, - float* im2col_data, const Dims<4>& im2col_dims) { +inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, + float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + (void)im2col_data; // only used in optimized code. - (void)im2col_dims; // only used in optimized code. - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); - const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); + (void)im2col_shape; // only used in optimized code. + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); if (bias_data) { - TFLITE_DCHECK_EQ(ArraySize(filter_dims, 3), ArraySize(bias_dims, 0)); - } - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -202,11 +212,11 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - float input_value = input_data[Offset(input_dims, in_channel, - in_x, in_y, batch)]; + float input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; float filter_value = - filter_data[Offset(filter_dims, in_channel, filter_x, - filter_y, out_channel)]; + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; total += (input_value * filter_value); } } @@ -214,9 +224,9 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, } float bias_value = 0.0f; if (bias_data) { - bias_value = bias_data[Offset(bias_dims, out_channel, 0, 0, 0)]; + bias_value = bias_data[out_channel]; } - output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] = + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = ActivationFunctionWithMinMax(total + bias_value, output_activation_min, output_activation_max); @@ -226,6 +236,35 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims, + float* im2col_data, const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), + filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. template void Conv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, @@ -243,6 +282,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims, im2col_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void Conv(const float* input_data, const Dims<4>& input_dims, @@ -259,6 +299,7 @@ void Conv(const float* input_data, const Dims<4>& input_dims, im2col_data, im2col_dims); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void Conv(const float* input_data, const Dims<4>& input_dims, @@ -272,31 +313,45 @@ void Conv(const float* input_data, const Dims<4>& input_dims, output_dims, im2col_data, im2col_dims); } -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - int32 output_offset, int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims, - uint8* im2col_data, const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { +inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + uint8* output_data, const RuntimeShape& im2col_shape, + uint8* im2col_data, gemmlowp::GemmContext* gemm_context) { (void)im2col_data; // only used in optimized code. - (void)im2col_dims; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. (void)gemm_context; // only used in optimized code. + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int32 input_offset = params.input_offset; + const int32 filter_offset = params.weights_offset; + const int32 output_offset = params.output_offset; + const int32 output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); - const int output_depth = - MatchingArraySize(filter_dims, 3, bias_dims, 0, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); + + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); for (int batch = 0; batch < batches; ++batch) { for (int out_y = 0; out_y < output_height; ++out_y) { for (int out_x = 0; out_x < output_width; ++out_x) { @@ -314,11 +369,11 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - int32 input_val = input_data[Offset(input_dims, in_channel, - in_x, in_y, batch)]; + int32 input_val = input_data[Offset(input_shape, batch, in_y, + in_x, in_channel)]; int32 filter_val = - filter_data[Offset(filter_dims, in_channel, filter_x, - filter_y, out_channel)]; + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; acc += (filter_val + filter_offset) * (input_val + input_offset); } @@ -326,14 +381,14 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, } } if (bias_data) { - acc += bias_data[Offset(bias_dims, out_channel, 0, 0, 0)]; + acc += bias_data[out_channel]; } acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, kReverseShift * output_shift); acc += output_offset; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); - output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] = + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast(acc); } } @@ -341,6 +396,43 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, + uint8* im2col_data, const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), + filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data, gemm_context); +} + +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. inline void Conv(const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, @@ -359,6 +451,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, im2col_data, im2col_dims, gemm_context); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template inline void Conv(const uint8* input_data, const Dims<4>& input_dims, @@ -388,6 +481,7 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims, im2col_data, im2col_dims, gemm_context); } +// TODO(b/80418076): Move to legacy ops file, update invocations. // legacy, for compatibility with old checked-in code template void Conv(const uint8* input_data, const Dims<4>& input_dims, @@ -4661,21 +4755,30 @@ void Transpose(const T* input, const Dims<4>& input_dims, T* output, output); } -inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, float* output_data, - const Dims<4>& output_dims, float* /*im2col_data*/, - const Dims<4>& /*im2col_dims*/) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); - const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); +inline void TransposeConv( + const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + (void)im2col_data; // only used in optimized code. + (void)im2col_shape; // only used in optimized code. + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); // Although transpose convolution simplifies to convolution with transposed // weights for strides of 1, non-unitary striding complicates matters. To @@ -4684,7 +4787,7 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, // computing their influence on the output, rather than looping through the // output elements in the typical "gather" access pattern of a conv. We // therefore must initialize the output array to zero. - const int num_elements = FlatSize(output_dims); + const int num_elements = output_shape.FlatSize(); for (int i = 0; i < num_elements; i++) { output_data[i] = 0.0f; } @@ -4707,13 +4810,14 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, // We cannot accumulate out of bounds if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && (out_y < output_height)) { - float input_value = input_data[Offset(input_dims, in_channel, - in_x, in_y, batch)]; + float input_value = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; float filter_value = - filter_data[Offset(filter_dims, in_channel, filter_x, - filter_y, out_channel)]; - output_data[Offset(output_dims, out_channel, out_x, out_y, - batch)] += input_value * filter_value; + filter_data[Offset(filter_shape, out_channel, filter_y, + filter_x, in_channel)]; + output_data[Offset(output_shape, batch, out_y, out_x, + out_channel)] += + input_value * filter_value; } } } @@ -4724,6 +4828,27 @@ inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, } } +// TODO(b/80418076): Move to legacy ops file, update invocations. +// Legacy. +inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + + TransposeConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + template inline bool EqualFn(T lhs, T rhs) { return lhs == rhs; diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index ac4626bc30..b70a87d0dc 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -179,12 +179,15 @@ class RuntimeShape { dims_[i] = val; } } + inline int32* DimsData() { return size_ > kMaxSmallSize ? dims_pointer_ : dims_; } inline const int32* DimsData() const { return size_ > kMaxSmallSize ? dims_pointer_ : dims_; } + // The caller must ensure that the shape is no bigger than 4-D. + inline const int32* DimsDataUpTo4D() const { return dims_; } inline void Resize(int dimensions_count) { if (size_ > kMaxSmallSize) { @@ -346,11 +349,12 @@ inline size_t ReducedOutputOffset(const int num_dims, const int* dims, } inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) { - TFLITE_DCHECK(i0 >= 0 && i0 < shape.Dims(0)); - TFLITE_DCHECK(i1 >= 0 && i1 < shape.Dims(1)); - TFLITE_DCHECK(i2 >= 0 && i2 < shape.Dims(2)); - TFLITE_DCHECK(i3 >= 0 && i3 < shape.Dims(3)); - const int* dims_data = shape.DimsData(); + TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4); + const int* dims_data = shape.DimsDataUpTo4D(); + TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]); + TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]); + TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]); + TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]); return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3; } -- GitLab From 86b8f034e4d3d3d12d1e9d1b94170b271491bed3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 18 Sep 2018 16:39:12 -0700 Subject: [PATCH 0345/1357] Reject RESHAPE if new_shape tensor is not provided. PiperOrigin-RevId: 213541006 --- tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc | 2 +- tensorflow/contrib/lite/nnapi_delegate.cc | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc index c6587b3d3f..d85e576284 100644 --- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc @@ -518,7 +518,7 @@ class NNAPIDelegateKernel { } break; case kTfLiteBuiltinReshape: - if (version == 1) { + if (version == 1 && node->inputs->size == 2) { return [](const NNAPIOpMappingArgs& mapping_args) -> ANeuralNetworksOperationType { return ANEURALNETWORKS_RESHAPE; diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index a1c7434599..f23a0ccb80 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -512,6 +512,10 @@ TfLiteStatus AddOpsAndParams( nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED; break; case tflite::BuiltinOperator_RESHAPE: + if (node.inputs->size != 2) { + logError("NNAPI only supports 2-input RESHAPE"); + return kTfLiteError; + } nn_op_type = ANEURALNETWORKS_RESHAPE; // add_reshape_params(node.builtin_data); break; -- GitLab From b9e6bbc95bcffa481d29e31b448a03a91ba17eac Mon Sep 17 00:00:00 2001 From: Goutham Bhat Date: Tue, 18 Sep 2018 16:44:59 -0700 Subject: [PATCH 0346/1357] Return OrderedDict as eval results should be sorted by global_step key. PiperOrigin-RevId: 213541935 --- .../contrib/estimator/python/estimator/early_stopping.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/estimator/python/estimator/early_stopping.py b/tensorflow/contrib/estimator/python/estimator/early_stopping.py index 3eab21d5ac..e6e25e319f 100644 --- a/tensorflow/contrib/estimator/python/estimator/early_stopping.py +++ b/tensorflow/contrib/estimator/python/estimator/early_stopping.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import operator import os @@ -306,7 +307,8 @@ def read_eval_metrics(eval_dir): metrics[value.tag] = value.simple_value if metrics: eval_metrics_dict[event.step] = metrics - return eval_metrics_dict + return collections.OrderedDict( + sorted(eval_metrics_dict.items(), key=lambda t: t[0])) def _stop_if_threshold_crossed_hook(estimator, metric_name, threshold, -- GitLab From 93b5dea9663c00d3bb06348143b50b73b6fbacfb Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 18 Sep 2018 16:58:32 -0700 Subject: [PATCH 0347/1357] Add ConstantScalar, WithPredicate, Disjunction, and OpAnyOrder (where Op is a commutative binary operator) to the XLA pattern matcher. PiperOrigin-RevId: 213543953 --- tensorflow/compiler/xla/service/BUILD | 3 + .../compiler/xla/service/pattern_matcher.h | 143 +++++++++++++++++- .../xla/service/pattern_matcher_test.cc | 84 ++++++++++ 3 files changed, 222 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index fb80c78f68..68bf56c1b1 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -365,8 +365,11 @@ cc_library( hdrs = ["pattern_matcher.h"], deps = [ ":hlo", + ":hlo_casting_utils", + "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "@com_google_absl//absl/strings", + "@com_google_absl//absl/utility", ], ) diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index 4869db79e7..7d4d62ecb9 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -17,8 +17,12 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_PATTERN_MATCHER_H_ #include "absl/strings/string_view.h" +#include "absl/utility/utility.h" #include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -228,8 +232,46 @@ class LayoutPattern { LayoutType** matched_layout_; }; +template +class AnyOfPattern { + public: + explicit AnyOfPattern(const Patterns&... patterns) : patterns_(patterns...) {} + + bool Match(const Item* item) const { + return MatchImpl(item, std::integral_constant()); + } + + bool Match(Item* item) const { + return MatchImpl(item, std::integral_constant()); + } + + private: + template + bool MatchImpl(ItemType* item, std::integral_constant) const { + return std::get(patterns_).Match(item) || + MatchImpl(item, std::integral_constant()); + } + + template + bool MatchImpl(ItemType* item, + std::integral_constant) const { + return false; + } + + std::tuple patterns_; +}; } // namespace detail +// Returns a pattern that represents the logical disjunction of the input +// patterns. The returned pattern matches from left to right, and stops on the +// first match. +template +detail::AnyOfPattern::type, Patterns...> AnyOf( + const Patterns&... patterns) { + return detail::AnyOfPattern::type, + Patterns...>(patterns...); +} + // Creates a layout pattern that will capture the matched layout in the // argument. inline constexpr detail::LayoutPattern +class HloPredicatePatternImpl { + public: + explicit HloPredicatePatternImpl(const Previous& previous, Predicate pred) + : previous_(previous), pred_(std::move(pred)) {} + + bool Match(const ItemType* item) const { + return previous_.Match(item) && pred_(item); + } + + bool Match(ItemType* item) const { + return previous_.Match(item) && pred_(item); + } + + private: + Previous previous_; + Predicate pred_; +}; + +struct PatternFriend; + // A pattern that matches HloInstructions. template class HloInstructionPattern { @@ -879,6 +942,21 @@ class HloInstructionPattern { } private: + template + constexpr HloInstructionPattern< + HloInstructionType, + HloPredicatePatternImpl< + Impl, typename std::remove_const::type, + Predicate>> + WithPredicate(Predicate pred) const { + using NewImplType = HloPredicatePatternImpl< + Impl, typename std::remove_const::type, Predicate>; + return HloInstructionPattern( + NewImplType(impl_, std::move(pred)), matched_inst_); + } + + friend struct PatternFriend; + Impl impl_; HloInstructionType** matched_inst_; }; @@ -1005,31 +1083,50 @@ XLA_UNOP_PATTERN(Transpose) .WithOperand(0, std::forward(lhs)) \ .WithOperand(1, std::forward(rhs)); \ } -XLA_BINOP_PATTERN(Add) + +#define XLA_COMMUTATIVE_BINOP_PATTERN(NAME) \ + XLA_BINOP_PATTERN(NAME) \ + \ + template \ + inline auto NAME##AnyOrder(Lhs&& lhs, Rhs&& rhs) \ + ->decltype(AnyOf(NAME(lhs, rhs), NAME(rhs, lhs))) { \ + return AnyOf(NAME(lhs, rhs), NAME(rhs, lhs)); \ + } \ + \ + template \ + inline auto NAME##AnyOrder(HloInstructionType** matched_inst, Lhs&& lhs, \ + Rhs&& rhs) \ + ->decltype(AnyOf(NAME(matched_inst, lhs, rhs), \ + NAME(matched_inst, rhs, lhs))) { \ + return AnyOf(NAME(matched_inst, lhs, rhs), \ + NAME(matched_inst, rhs, lhs)); \ + } +XLA_COMMUTATIVE_BINOP_PATTERN(Add) XLA_BINOP_PATTERN(Atan2) XLA_BINOP_PATTERN(Divide) XLA_BINOP_PATTERN(Complex) XLA_BINOP_PATTERN(Dot) -XLA_BINOP_PATTERN(Eq) +XLA_COMMUTATIVE_BINOP_PATTERN(Eq) XLA_BINOP_PATTERN(Gather) XLA_BINOP_PATTERN(Ge) XLA_BINOP_PATTERN(Gt) XLA_BINOP_PATTERN(Le) XLA_BINOP_PATTERN(Lt) -XLA_BINOP_PATTERN(Maximum) -XLA_BINOP_PATTERN(Minimum) -XLA_BINOP_PATTERN(Multiply) -XLA_BINOP_PATTERN(Ne) +XLA_COMMUTATIVE_BINOP_PATTERN(Maximum) +XLA_COMMUTATIVE_BINOP_PATTERN(Minimum) +XLA_COMMUTATIVE_BINOP_PATTERN(Multiply) +XLA_COMMUTATIVE_BINOP_PATTERN(Ne) XLA_BINOP_PATTERN(Outfeed) XLA_BINOP_PATTERN(Power) XLA_BINOP_PATTERN(Remainder) XLA_BINOP_PATTERN(Send) XLA_BINOP_PATTERN(Subtract) -XLA_BINOP_PATTERN(And) -XLA_BINOP_PATTERN(Or) +XLA_COMMUTATIVE_BINOP_PATTERN(And) +XLA_COMMUTATIVE_BINOP_PATTERN(Or) XLA_BINOP_PATTERN(ShiftLeft) XLA_BINOP_PATTERN(ShiftRightArithmetic) XLA_BINOP_PATTERN(ShiftRightLogical) +#undef XLA_COMMUTATIVE_BINOP_PATTERN #undef XLA_BINOP_PATTERN // Helpers for ternary instructions. @@ -1070,6 +1167,30 @@ XLA_TERNOP_PATTERN(Clamp); XLA_TERNOP_PATTERN(Select); #undef XLA_TERNOP_PATTERN +namespace detail { +struct PatternFriend { + template + static auto ConstantScalar(T constant) -> decltype( + Constant() + .WithShape(match::Shape().IsScalar()) + .WithPredicate( + std::declval>())) { + std::function pred = + [constant](const HloInstruction* instr) { + const auto& literal = Cast(instr)->literal(); + auto status_or_const = LiteralUtil::CreateR0(constant).Convert( + literal.shape().element_type()); + return status_or_const.ok() && + literal == status_or_const.ConsumeValueOrDie(); + }; + + return Constant() + .WithShape(match::Shape().IsScalar()) + .WithPredicate(std::move(pred)); + } +}; +} // namespace detail + // Helpers for matching non-constant instructions. inline auto NonConstant() -> decltype(Op().IsNonConstant()) { return Op().IsNonConstant(); @@ -1107,6 +1228,12 @@ inline auto GetTupleElement(HloInstructionType** matched_inst, Arg&& arg, .WithTupleIndex(tuple_index); } +template +inline auto ConstantScalar(T constant) + -> decltype(detail::PatternFriend::ConstantScalar(constant)) { + return detail::PatternFriend::ConstantScalar(constant); +} + } // namespace match } // namespace xla diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc index a530581c34..b3a2c954b3 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc +++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc @@ -211,5 +211,89 @@ TEST(PatternMatcherTest, GetTupleElement) { EXPECT_TRUE(Match(root, match::GetTupleElement(match::Op(), 1))); } +TEST(PatternMatcherTest, AnyOf) { + constexpr char kModuleStr[] = R"( + HloModule test_module ENTRY test { ROOT constant = f16[] constant(1) })"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr)); + auto* root = hlo_module->entry_computation()->root_instruction(); + + EXPECT_TRUE( + Match(root, match::AnyOf(match::ConstantScalar(0), + match::ConstantScalar(1)))); + EXPECT_TRUE( + Match(root, match::AnyOf(match::ConstantScalar(1), + match::ConstantScalar(0)))); + EXPECT_FALSE( + Match(root, match::AnyOf(match::ConstantScalar(0), + match::ConstantScalar(2)))); +} + +TEST(PatternMatcherTest, ConstantScalar) { + constexpr char kModuleStr[] = R"( + HloModule test_module ENTRY test { ROOT constant = f16[] constant(42) })"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr)); + auto* root = hlo_module->entry_computation()->root_instruction(); + + EXPECT_TRUE(Match(root, match::ConstantScalar(42))); + EXPECT_FALSE(Match(root, match::ConstantScalar(41))); + EXPECT_FALSE(Match(root, match::ConstantScalar(0))); +} + +TEST(PatternMatcherTest, MultiplyAnyOrder) { + using match::ConstantScalar; + using match::MultiplyAnyOrder; + + constexpr char kModuleStr[] = R"( + HloModule test_module + ENTRY test { + lhs = f16[] constant(42) + rhs = f16[] constant(52) + ROOT multiply = f16[] multiply(lhs, rhs) + })"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr)); + auto* root = hlo_module->entry_computation()->root_instruction(); + const HloInstruction* instr; + + EXPECT_TRUE(Match( + root, MultiplyAnyOrder(&instr, ConstantScalar(42), ConstantScalar(52)))); + EXPECT_TRUE(Match( + root, MultiplyAnyOrder(&instr, ConstantScalar(52), ConstantScalar(42)))); +} + +TEST(PatternMatcherTest, AnyOfShortCircuit) { + using match::AnyOf; + using match::Multiply; + using match::Op; + + constexpr char kModuleStr[] = R"( + HloModule test_module + ENTRY test { + lhs = f16[] constant(42) + rhs = f16[] constant(52) + ROOT multiply = f16[] multiply(lhs, rhs) + })"; + TF_ASSERT_OK_AND_ASSIGN(auto hlo_module, ParseHloString(kModuleStr)); + auto* root = hlo_module->entry_computation()->root_instruction(); + + { + const HloInstruction* mul = nullptr; + const HloInstruction* any = nullptr; + + ASSERT_TRUE(Match( + root, AnyOf(Multiply(&mul, Op(), Op()), Op(&any)))); + EXPECT_NE(nullptr, mul); + EXPECT_EQ(nullptr, any); + } + { + const HloInstruction* mul = nullptr; + const HloInstruction* any = nullptr; + + ASSERT_TRUE(Match( + root, AnyOf(Op(&any), Multiply(&mul, Op(), Op())))); + EXPECT_NE(nullptr, any); + EXPECT_EQ(nullptr, mul); + } +} + } // namespace } // namespace xla -- GitLab From 867449616aa43f9306247cebdd1edac85b70852a Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 18 Sep 2018 17:22:53 -0700 Subject: [PATCH 0348/1357] Convert the new metric instances to (value_op, update_op) tuple in the EstimatorSpec. PiperOrigin-RevId: 213548081 --- tensorflow/python/estimator/estimator.py | 14 ++------------ tensorflow/python/estimator/model_fn.py | 2 ++ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index ffe1e30da0..2dc5d099a0 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -41,7 +41,6 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import tensor_util -from tensorflow.python.keras import metrics from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import metrics as metrics_lib @@ -1792,18 +1791,9 @@ def _extract_metric_update_ops(eval_dict, distribution=None): value_ops = {} # Sort metrics lexicographically so graph is identical every time. for name, value in sorted(six.iteritems(eval_dict)): - if isinstance(value, metrics.Metric): - metric_result = value.result() - # We expect only one update op for every metric when there is no - # distribution strategy. - metric_update = value.updates if distribution else value.updates[0] - else: - metric_result = value[0] - metric_update = value[1] - - value_ops[name] = metric_result + value_ops[name] = value[0] update_ops.append( - distribution.group(metric_update) if distribution else metric_update) + distribution.group(value[1]) if distribution else value[1]) update_op = control_flow_ops.group(*update_ops) if update_ops else None return update_op, value_ops diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py index 0f26a5bba4..824789467d 100644 --- a/tensorflow/python/estimator/model_fn.py +++ b/tensorflow/python/estimator/model_fn.py @@ -308,6 +308,8 @@ class EstimatorSpec( for key, value in six.iteritems(eval_metric_ops): if isinstance(value, Metric): vars_to_add.update(value.variables) + # Convert Metric instances to (value_tensor, update_op) tuple. + eval_metric_ops[key] = (value.result(), value.updates[0]) # Remove variables that are in the local variables collection already. vars_to_add = vars_to_add.difference(local_vars) for v in vars_to_add: -- GitLab From 38d8f893e0ab8376cf97c40fde78002f31776c92 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 18 Sep 2018 17:34:53 -0700 Subject: [PATCH 0349/1357] Add a new function to load kernel libraries and library folders. PiperOrigin-RevId: 213549838 --- tensorflow/python/framework/load_library.py | 65 +++++++++++++++++++ .../tools/api/golden/v1/tensorflow.pbtxt | 4 ++ .../tools/api/golden/v2/tensorflow.pbtxt | 4 ++ 3 files changed, 73 insertions(+) diff --git a/tensorflow/python/framework/load_library.py b/tensorflow/python/framework/load_library.py index 535c6017f5..908a5f521e 100644 --- a/tensorflow/python/framework/load_library.py +++ b/tensorflow/python/framework/load_library.py @@ -18,14 +18,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import errno import hashlib import imp +import os +import platform import sys import threading # pylint: disable=unused-import from tensorflow.core.framework import op_def_pb2 from tensorflow.core.lib.core import error_codes_pb2 # pylint: disable=unused-import from tensorflow.python import pywrap_tensorflow as py_tf +from tensorflow.python.lib.io import file_io from tensorflow.python.util import compat from tensorflow.python.util.tf_export import tf_export @@ -98,3 +102,64 @@ def load_file_system_library(library_filename): RuntimeError: when unable to load the library. """ py_tf.TF_LoadLibrary(library_filename) + + +def _is_shared_object(filename): + """Check the file to see if it is a shared object, only using extension.""" + if platform.system() == 'Linux': + if filename.endswith('.so'): + return True + else: + index = filename.rfind('.so.') + if index == -1: + return False + else: + # A shared object with the API version in filename + return filename[index + 4].isdecimal() + elif platform.system() == 'Darwin': + return filename.endswith('.dylib') + elif platform.system() == 'Windows': + return filename.endswith('.dll') + else: + return False + + +@tf_export('load_library') +def load_library(library_location): + """Loads a TensorFlow plugin. + + "library_location" can be a path to a specific shared object, or a folder. + If it is a folder, all sahred objects that are named "libtfkernel*" will be + loaded. When the library is loaded, kernels registered in the library via the + `REGISTER_*` macros are made available in the TensorFlow process. + + Args: + library_location: Path to the plugin or the folder of plugins. + Relative or absolute filesystem path to a dynamic library file or folder. + + Returns: + None + + Raises: + OSError: When the file to be loaded is not found. + RuntimeError: when unable to load the library. + """ + if file_io.file_exists(library_location): + if file_io.is_directory(library_location): + directory_contents = file_io.list_directory(library_location) + + kernel_libraries = [ + os.path.join(library_location, f) for f in directory_contents + if _is_shared_object(f)] + else: + kernel_libraries = [library_location] + + for lib in kernel_libraries: + py_tf.TF_LoadLibrary(lib) + + else: + raise OSError( + errno.ENOENT, + 'The file or folder to load kernel libraries from does not exist.', + library_location) + diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index dd9f7c49e0..14ab885c91 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1372,6 +1372,10 @@ tf_module { name: "load_file_system_library" argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_library" + argspec: "args=[\'library_location\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_op_library" argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 9332e16bf6..323d2fc519 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -1320,6 +1320,10 @@ tf_module { name: "load_file_system_library" argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "load_library" + argspec: "args=[\'library_location\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "load_op_library" argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None" -- GitLab From dff19b5a8b36ddf4aa51ce978d97b63129a7fdeb Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 18 Sep 2018 17:50:43 -0700 Subject: [PATCH 0350/1357] Add layout information to logging. PiperOrigin-RevId: 213551652 --- .../compiler/xla/service/gpu/cudnn_convolution_runner.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc index 2a86ac265e..3310ee848e 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc @@ -92,9 +92,9 @@ Status RunCudnnConvolutionImpl(CudnnConvParams params, VLOG(3) << "tensor_ops_enabled: " << algorithm.algorithm().tensor_ops_enabled(); VLOG(3) << "Convolution kind: " << CudnnConvKindToString(kind); - VLOG(3) << "input shape: { " << ShapeUtil::HumanString(input_shape) << " }"; - VLOG(3) << "filter shape: { " << ShapeUtil::HumanString(filter_shape) << " }"; - VLOG(3) << "Output shape: { " << ShapeUtil::HumanString(output_shape) << " }"; + VLOG(3) << "input shape: " << ShapeUtil::HumanStringWithLayout(input_shape); + VLOG(3) << "filter shape: " << ShapeUtil::HumanStringWithLayout(filter_shape); + VLOG(3) << "Output shape: " << ShapeUtil::HumanStringWithLayout(output_shape); VLOG(3) << "Window: { " << window.ShortDebugString() << " }"; VLOG(3) << "Dim nums: { " << dnums.ShortDebugString() << " }"; -- GitLab From c2dc702159cfccb623b99daf2f9df875a1f3dbfd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 18 Sep 2018 17:56:20 -0700 Subject: [PATCH 0351/1357] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 213552354 --- tensorflow/go/op/wrappers.go | 986 +++++++++++++++++++++++++++++++---- 1 file changed, 892 insertions(+), 94 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 322b35dd91..eb636dbf54 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -332,7 +332,7 @@ func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQua // Creates a new tensor by applying sparse `updates` to individual values or // slices within a tensor (initially zero for numeric, empty for string) of // the given `shape` according to indices. This operator is the inverse of the -// @{tf.gather_nd} operator which extracts values or slices from a given tensor. +// `tf.gather_nd` operator which extracts values or slices from a given tensor. // // If `indices` contains duplicates, then their updates are accumulated (summed). // @@ -1473,7 +1473,7 @@ type StridedSliceAttr func(optionalAttr) // // value: a bitmask where a bit i being 1 means to ignore the begin // value and instead use the largest interval possible. At runtime -// begin[i] will be replaced with `[0, n-1) if `stride[i] > 0` or +// begin[i] will be replaced with `[0, n-1)` if `stride[i] > 0` or // `[-1, n-1]` if `stride[i] < 0` // If not specified, defaults to 0 func StridedSliceBeginMask(value int64) StridedSliceAttr { @@ -1856,6 +1856,32 @@ func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_d return op.Output(0) } +// Ensures that the tensor's shape matches the expected shape. +// +// Raises an error if the input tensor's shape does not match the specified shape. +// Returns the input tensor otherwise. +// +// Arguments: +// input: A tensor, whose shape is to be validated. +// shape: The expected (possibly partially specified) shape of the input tensor. +// +// Returns A tensor with the same shape and contents as the input tensor or value. +func EnsureShape(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"shape": shape} + opspec := tf.OpSpec{ + Type: "EnsureShape", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // UniqueWithCountsV2Attr is an optional argument to UniqueWithCountsV2. type UniqueWithCountsV2Attr func(optionalAttr) @@ -2259,7 +2285,7 @@ func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Ou // // output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] // -// Whereas in @{tf.gather} `indices` defines slices into the first +// Whereas in `tf.gather` `indices` defines slices into the first // dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the // first `N` dimensions of `params`, where `N = indices.shape[-1]`. // @@ -2356,6 +2382,8 @@ func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Ou // output = [['b0', 'b1'], ['d0', 'c1']] // ``` // +// See also `tf.gather` and `tf.batch_gather`. +// // Arguments: // params: The tensor from which to gather values. // indices: Index tensor. @@ -2445,6 +2473,16 @@ func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...Gathe // [9, 9, 9]] // ``` // +// `tf.fill` differs from `tf.constant` in a few ways: +// +// * `tf.fill` only supports scalar contents, whereas `tf.constant` supports +// Tensor values. +// * `tf.fill` creates an Op in the computation graph that constructs the actual +// Tensor value at runtime. This is in contrast to `tf.constant` which embeds +// the entire Tensor into the graph with a `Const` node. +// * Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes +// based on other runtime Tensors, unlike `tf.constant`. +// // Arguments: // dims: 1-D. Represents the shape of the output tensor. // value: 0-D (scalar). Value to fill the returned tensor. @@ -2858,6 +2896,25 @@ func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Returns a constant tensor on the host. Only for writing C++ tests. +// +// Arguments: +// value: Attr `value` is the tensor to return. +// +func HostConst(scope *Scope, value tf.Tensor, dtype tf.DataType) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"value": value, "dtype": dtype} + opspec := tf.OpSpec{ + Type: "HostConst", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Splits a tensor into `num_split` tensors along one dimension. // // Arguments: @@ -3377,6 +3434,204 @@ func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// Bucketize each feature based on bucket boundaries. +// +// An op that returns a list of float tensors, where each tensor represents the +// bucketized values for a single feature. +// +// Arguments: +// float_values: float; List of Rank 2 Tensor each containing float values for a single feature. +// bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a single +// feature. +// +// Returns int; List of Rank 2 Tensors each containing the bucketized values for a single feature. +func BoostedTreesBucketize(scope *Scope, float_values []tf.Output, bucket_boundaries []tf.Output) (buckets []tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesBucketize", + Input: []tf.Input{ + tf.OutputList(float_values), tf.OutputList(bucket_boundaries), + }, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if buckets, idx, err = makeOutputList(op, idx, "buckets"); err != nil { + scope.UpdateErr("BoostedTreesBucketize", err) + return + } + return buckets +} + +// BoostedTreesQuantileStreamResourceFlushAttr is an optional argument to BoostedTreesQuantileStreamResourceFlush. +type BoostedTreesQuantileStreamResourceFlushAttr func(optionalAttr) + +// BoostedTreesQuantileStreamResourceFlushGenerateQuantiles sets the optional generate_quantiles attribute to value. +// +// value: bool; If True, the output will be the num_quantiles for each stream where the ith +// entry is the ith quantile of the input with an approximation error of epsilon. +// Duplicate values may be present. +// If False, the output will be the points in the histogram that we got which roughly +// translates to 1/epsilon boundaries and without any duplicates. +// Default to False. +// If not specified, defaults to false +func BoostedTreesQuantileStreamResourceFlushGenerateQuantiles(value bool) BoostedTreesQuantileStreamResourceFlushAttr { + return func(m optionalAttr) { + m["generate_quantiles"] = value + } +} + +// Flush the summaries for a quantile stream resource. +// +// An op that flushes the summaries for a quantile stream resource. +// +// Arguments: +// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. +// num_buckets: int; approximate number of buckets unless using generate_quantiles. +// +// Returns the created operation. +func BoostedTreesQuantileStreamResourceFlush(scope *Scope, quantile_stream_resource_handle tf.Output, num_buckets tf.Output, optional ...BoostedTreesQuantileStreamResourceFlushAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "BoostedTreesQuantileStreamResourceFlush", + Input: []tf.Input{ + quantile_stream_resource_handle, num_buckets, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Add the quantile summaries to each quantile stream resource. +// +// An op that adds a list of quantile summaries to a quantile stream resource. Each +// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank) +// for a single feature. +// +// Arguments: +// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. +// summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature. +// +// Returns the created operation. +func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesQuantileStreamResourceAddSummaries", + Input: []tf.Input{ + quantile_stream_resource_handle, tf.OutputList(summaries), + }, + } + return scope.AddOperation(opspec) +} + +// Makes the summary of quantiles for the batch. +// +// An op that takes a list of tensors and outputs the quantile summaries for each tensor. +// +// Arguments: +// float_values: float; List of Rank 2 Tensors each containing values for a single feature. +// example_weights: float; Rank 1 Tensor with weights per instance. +// epsilon: float; The required maximum approximation error. +// +// Returns float; List of Rank 2 Tensors each containing the quantile summary (value, weight, +// min_rank, max_rank) of a single feature. +func BoostedTreesMakeQuantileSummaries(scope *Scope, float_values []tf.Output, example_weights tf.Output, epsilon tf.Output) (summaries []tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesMakeQuantileSummaries", + Input: []tf.Input{ + tf.OutputList(float_values), example_weights, epsilon, + }, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil { + scope.UpdateErr("BoostedTreesMakeQuantileSummaries", err) + return + } + return summaries +} + +// BoostedTreesCreateQuantileStreamResourceAttr is an optional argument to BoostedTreesCreateQuantileStreamResource. +type BoostedTreesCreateQuantileStreamResourceAttr func(optionalAttr) + +// BoostedTreesCreateQuantileStreamResourceMaxElements sets the optional max_elements attribute to value. +// +// value: int; The maximum number of data points that can be fed to the stream. +// If not specified, defaults to 1099511627776 +func BoostedTreesCreateQuantileStreamResourceMaxElements(value int64) BoostedTreesCreateQuantileStreamResourceAttr { + return func(m optionalAttr) { + m["max_elements"] = value + } +} + +// Create the Resource for Quantile Streams. +// +// Arguments: +// quantile_stream_resource_handle: resource; Handle to quantile stream resource. +// epsilon: float; The required approximation error of the stream resource. +// num_streams: int; The number of streams managed by the resource that shares the same epsilon. +// +// Returns the created operation. +func BoostedTreesCreateQuantileStreamResource(scope *Scope, quantile_stream_resource_handle tf.Output, epsilon tf.Output, num_streams tf.Output, optional ...BoostedTreesCreateQuantileStreamResourceAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "BoostedTreesCreateQuantileStreamResource", + Input: []tf.Input{ + quantile_stream_resource_handle, epsilon, num_streams, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Checks whether a quantile stream has been initialized. +// +// An Op that checks if quantile stream resource is initialized. +// +// Arguments: +// quantile_stream_resource_handle: resource; The reference to quantile stream resource handle. +// +// Returns bool; True if the resource is initialized, False otherwise. +func IsBoostedTreesQuantileStreamResourceInitialized(scope *Scope, quantile_stream_resource_handle tf.Output) (is_initialized tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IsBoostedTreesQuantileStreamResourceInitialized", + Input: []tf.Input{ + quantile_stream_resource_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Calculates the prior from the training data (the bias) and fills in the first node with the logits' prior. Returns a boolean indicating whether to continue centering. // // Arguments: @@ -3491,8 +3746,9 @@ func BoostedTreesExampleDebugOutputs(scope *Scope, tree_ensemble_handle tf.Outpu // Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is // misisng, the `output` tensor at that position will be zeroed. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // For example: // @@ -3601,8 +3857,9 @@ func Asin(scope *Scope, x tf.Output) (y tf.Output) { // Computes the sum along sparse segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first // dimension, selecting a subset of dimension 0, specified by `indices`. @@ -3668,28 +3925,32 @@ func Sinh(scope *Scope, x tf.Output) (y tf.Output) { // Computes the minimum along segments of a tensor. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#segmentation) +// for an explanation of segments. // // This operator is similar to the unsorted segment sum operator found // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). // Instead of computing the sum over segments, it computes the minimum such that: // -// \\(output_i = \min_j data_j\\) where min is over `j` such -// that `segment_ids[j] == i`. +// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such +// that `segment_ids[j...] == i`. // // If the minimum is empty for a given segment ID `i`, it outputs the largest // possible value for the specific numeric type, // `output[i] = numeric_limits::max()`. // +// If the given segment ID `i` is negative, then the corresponding value is +// dropped, and will not be included in the result. +// // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. +// segment_ids: A tensor whose shape is a prefix of `data.shape`. // // -// Returns Has same shape as data, except for dimension 0 which -// has size `num_segments`. +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return @@ -3721,11 +3982,12 @@ func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { // Computes the sum along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Computes a tensor such that -// \\(output[i] = sum_{j...} data[j...]\\) where the sum is over tuples `j...` such +// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such // that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` // need not be sorted and need not cover all values in the full // range of valid values. @@ -4353,32 +4615,39 @@ func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthEleme // Computes the maximum along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // This operator is similar to the unsorted segment sum operator found // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). // Instead of computing the sum over segments, it computes the maximum such that: // -// \\(output_i = \max_j data_j\\) where max is over `j` such -// that `segment_ids[j] == i`. +// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such +// that `segment_ids[j...] == i`. // // If the maximum is empty for a given segment ID `i`, it outputs the smallest // possible value for the specific numeric type, // `output[i] = numeric_limits::lowest()`. // +// If the given segment ID `i` is negative, then the corresponding value is +// dropped, and will not be included in the result. +// //
// //
// // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. -// +// segment_ids: A tensor whose shape is a prefix of `data.shape`.END +// } +// out_arg { +// name: "output" +// description: <= 0 +func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Ncontext_sparse"] = value + } +} + +// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Ncontext_dense"] = value + } +} + +// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Nfeature_list_sparse"] = value + } +} + +// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Nfeature_list_dense"] = value + } +} + +// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. +// +// value: A list of Ncontext_sparse types; the data types of data in +// each context Feature given in context_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["context_sparse_types"] = value + } +} + +// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["feature_list_dense_types"] = value + } +} + +// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. +// +// value: A list of Ncontext_dense shapes; the shapes of data in +// each context Feature given in context_dense_keys. +// The number of elements in the Feature corresponding to context_dense_key[j] +// must always equal context_dense_shapes[j].NumEntries(). +// The shape of context_dense_values[j] will match context_dense_shapes[j]. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["context_dense_shapes"] = value + } +} + +// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. +// +// value: A list of Nfeature_list_sparse types; the data types +// of data in each FeatureList given in feature_list_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["feature_list_sparse_types"] = value + } +} + +// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. +// +// value: A list of Nfeature_list_dense shapes; the shapes of +// data in each FeatureList given in feature_list_dense_keys. +// The shape of each Feature in the FeatureList corresponding to +// feature_list_dense_key[j] must always equal +// feature_list_dense_shapes[j].NumEntries(). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["feature_list_dense_shapes"] = value + } +} + +// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors. +// +// Arguments: +// serialized: A vector containing binary serialized SequenceExample protos. +// debug_name: A vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) name for the +// corresponding serialized proto. This is purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no name is available. +// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). +// context_dense_defaults[j] provides default values +// when the SequenceExample's context map lacks context_dense_key[j]. +// If an empty Tensor is provided for context_dense_defaults[j], +// then the Feature context_dense_keys[j] is required. +// The input type is inferred from context_dense_defaults[j], even when it's +// empty. If context_dense_defaults[j] is not empty, its shape must match +// context_dense_shapes[j]. +// feature_list_dense_missing_assumed_empty: A vector listing the +// FeatureList keys which may be missing from the SequenceExamples. If the +// associated FeatureList is missing, it is treated as empty. By default, +// any FeatureList not listed in this vector must exist in the SequenceExamples. +// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). +// The keys expected in the Examples' features associated with context_sparse +// values. +// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' context features associated with +// dense values. +// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors +// (scalars). The keys expected in the FeatureLists associated with sparse +// values. +// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' feature_lists associated +// with lists of dense values. +func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QueueDequeueV2", + Type: "ParseSequenceExample", Input: []tf.Input{ - handle, + serialized, debug_name, tf.OutputList(context_dense_defaults), }, Attrs: attrs, } @@ -7903,11 +8389,43 @@ func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataTyp } var idx int var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueV2", err) + if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) return } - return components + if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths } // Computes the Gauss error function of `x` element-wise. @@ -9188,6 +9706,49 @@ func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64 return op.Output(0) } +// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace. +type StaticRegexReplaceAttr func(optionalAttr) + +// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value. +// +// value: If True, the replacement is global, otherwise the replacement +// is done only on the first match. +// If not specified, defaults to true +func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr { + return func(m optionalAttr) { + m["replace_global"] = value + } +} + +// Replaces the match of pattern in input with rewrite. +// +// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// +// Arguments: +// input: The text to be processed. +// pattern: The regular expression to match the input. +// rewrite: The rewrite to be applied to the matched expresion. +// +// Returns The text after applying pattern and rewrite. +func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StaticRegexReplace", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes gradients for the exponential linear (Elu) operation. // // Arguments: @@ -10054,7 +10615,7 @@ func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr { // // [1, 12, 3, 14, 14, 6, 7, 20] // -// See @{tf.scatter_nd} for more details about how to make updates to +// See `tf.scatter_nd` for more details about how to make updates to // slices. // // Arguments: @@ -11397,6 +11958,29 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu return op.Output(0), op.Output(1) } +// String lengths of `input`. +// +// Computes the length of each string given in the input tensor. +// +// Arguments: +// input: The string for which to compute the length. +// +// Returns Integer tensor that has the same shape as `input`. The output contains the +// element-wise string lengths of `input`. +func StringLength(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "StringLength", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Converts each string in the input Tensor to its hash mod by a number of buckets. // // The hash function is deterministic on the content of the string within the @@ -11747,7 +12331,7 @@ func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { // // [1, 11, 3, 10, 9, 6, 7, 12] // -// See @{tf.scatter_nd} for more details about how to make updates to +// See `tf.scatter_nd` for more details about how to make updates to // slices. // // Arguments: @@ -12232,8 +12816,9 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { // Computes the mean along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Computes a tensor such that // \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is @@ -12248,7 +12833,7 @@ func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { // // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s // first dimension. Values should be sorted and can be repeated. // // Returns Has same shape as data, except for dimension 0 which @@ -12367,7 +12952,7 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o // // Arguments: // input: A string tensor of the text to be processed. -// pattern: A 1-D string tensor of the regular expression to match the input. +// pattern: A scalar string tensor containing the regular expression to match the input. // // Returns A bool tensor with the same shape as `input`. func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) { @@ -14443,6 +15028,25 @@ func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf return scope.AddOperation(opspec) } +// Returns 0 if the denominator is zero. +// +// +// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DivNoNan", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the gradient for the sqrt of `x` wrt its input. // // Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` @@ -15350,6 +15954,36 @@ func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Out return op.Output(0) } +// Check if the input matches the regex pattern. +// +// The input is a string tensor of any shape. The pattern is the +// regular expression to be matched with every element of the input tensor. +// The boolean values (True or False) of the output tensor indicate +// if the input matches the regex pattern provided. +// +// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// +// Arguments: +// input: A string tensor of the text to be processed. +// pattern: The regular expression to match the input. +// +// Returns A bool tensor with the same shape as `input`. +func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"pattern": pattern} + opspec := tf.OpSpec{ + Type: "StaticRegexFullMatch", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent. type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr) @@ -15947,6 +16581,23 @@ func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator t return scope.AddOperation(opspec) } +// Creates a dataset containing elements of first component of `input_dataset` having true in the last component. +func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "FilterByLastComponentDataset", + Input: []tf.Input{ + input_dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams. type CudnnRNNCanonicalToParamsAttr func(optionalAttr) @@ -16806,7 +17457,8 @@ func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { // records: Each string is a record/row in the csv and all records should have // the same format. // record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or empty if the column is required. +// scalar default value for that column or an empty vector if the column is +// required. // // Returns Each tensor will have the same shape as records. func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { @@ -17573,8 +18225,9 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_ // Computes the sum along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Computes a tensor such that // \\(output_i = \sum_j data_j\\) where sum is over `j` such @@ -17588,7 +18241,7 @@ func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_ // // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s // first dimension. Values should be sorted and can be repeated. // // Returns Has same shape as data, except for dimension 0 which @@ -19505,8 +20158,9 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min // Computes the minimum along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Computes a tensor such that // \\(output_i = \min_j(data_j)\\) where `min` is over `j` such @@ -19520,7 +20174,7 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min // // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s // first dimension. Values should be sorted and can be repeated. // // Returns Has same shape as data, except for dimension 0 which @@ -20266,27 +20920,31 @@ func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { // Computes the product along segments of a tensor. // -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#segmentation) +// for an explanation of segments. // // This operator is similar to the unsorted segment sum operator found // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). // Instead of computing the sum over segments, it computes the product of all // entries belonging to a segment such that: // -// \\(output_i = \prod_j data_j\\) where the product is over `j` such -// that `segment_ids[j] == i`. +// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples +// `j...` such that `segment_ids[j...] == i`. // // If there is no entry for a given segment ID `i`, it outputs 1. // +// If the given segment ID `i` is negative, then the corresponding value is +// dropped, and will not be included in the result. +// // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. +// segment_ids: A tensor whose shape is a prefix of `data.shape`. // // -// Returns Has same shape as data, except for dimension 0 which -// has size `num_segments`. +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return @@ -20363,8 +21021,9 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf // Computes the mean along sparse segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first // dimension, selecting a subset of dimension 0, specified by `indices`. @@ -20433,8 +21092,9 @@ func Cosh(scope *Scope, x tf.Output) (y tf.Output) { // Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is // misisng, the `output` tensor at that position will be zeroed. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Arguments: // @@ -20579,8 +21239,9 @@ func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segm // // N is the size of the segment being reduced. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Arguments: // @@ -20638,8 +21299,9 @@ func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { // Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is // misisng, the `output` tensor at that position will be zeroed. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Arguments: // @@ -21000,8 +21662,9 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output // Computes the maximum along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Computes a tensor such that // \\(output_i = \max_j(data_j)\\) where `max` is over `j` such @@ -21015,7 +21678,7 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output // // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s // first dimension. Values should be sorted and can be repeated. // // Returns Has same shape as data, except for dimension 0 which @@ -23431,29 +24094,57 @@ func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, it return op.Output(0) } -// Computes the matrix exponential of one or more square matrices: -// -// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead. +// Creates a Tensor by indexing into the TensorList. // -// \\(exp(A) = \sum_{n=0}^\infty A^n/n!\\) +// Each row in the produced Tensor corresponds to the element in the TensorList +// specified by the given index (see `tf.gather`). // -// The exponential is computed using a combination of the scaling and squaring -// method and the Pade approximation. Details can be founds in: -// Nicholas J. Higham, "The scaling and squaring method for the matrix exponential -// revisited," SIAM J. Matrix Anal. Applic., 26:1179-1193, 2005. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the exponential for all input submatrices `[..., :, :]`. +// input_handle: The input tensor list. +// indices: The indices used to index into the list. +// values: The tensor. +func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_dtype tf.DataType) (values tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + opspec := tf.OpSpec{ + Type: "TensorListGather", + Input: []tf.Input{ + input_handle, indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a TensorList by indexing into a Tensor. // -// Arguments: -// input: Shape is `[..., M, M]`. +// Each member of the TensorList corresponds to one row of the input tensor, +// specified by the given index (see `tf.gather`). // -// Returns Shape is `[..., M, M]`. +// tensor: The input tensor. +// indices: The indices used to index into the list. +// element_shape: The shape of the elements in the list (can be less specified than +// the shape of the tensor). +// output_handle: The TensorList. +func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListScatter", + Input: []tf.Input{ + tensor, indices, element_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deprecated, use python implementation tf.linalg.matrix_exponential. // -// @compatibility(scipy) -// Equivalent to scipy.linalg.expm -// @end_compatibility +// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead. func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return @@ -23959,8 +24650,9 @@ func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, // Computes the product along segments of a tensor. // -// Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of -// segments. +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. // // Computes a tensor such that // \\(output_i = \prod_j data_j\\) where the product is over `j` such @@ -23974,7 +24666,7 @@ func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, // // Arguments: // -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s // first dimension. Values should be sorted and can be repeated. // // Returns Has same shape as data, except for dimension 0 which @@ -24999,7 +25691,7 @@ func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr { // Update '*var' according to the Adam algorithm. // -// $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ +// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ // $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ // $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ // $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ @@ -27016,8 +27708,10 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source // If `len` defines a substring that would extend beyond the length of the input // string, then as many characters as possible are used. // -// If `pos` is negative or specifies a character index larger than any of the input -// strings, then an `InvalidArgumentError` is thrown. +// A negative `pos` indicates distance within the string backwards from the end. +// +// If `pos` specifies an index which is out of range for any of the input strings, +// then an `InvalidArgumentError` is thrown. // // `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on // Op creation. @@ -27643,6 +28337,8 @@ func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional .. // On GPU, if an out of bound index is found, a 0 is stored in the // corresponding output value. // +// See also `tf.batch_gather` and `tf.gather_nd`. +// // Arguments: // params: The tensor from which to gather values. Must be at least rank // `axis + 1`. @@ -28153,6 +28849,30 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Identity transformation that models performance. +// +// Identity transformation that models performance. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// +// +func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ModelDataset", + Input: []tf.Input{ + input_dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Performs a padding as a preprocess during a convolution. // // Similar to FusedResizeAndPadConv2d, this op allows for an optimized @@ -28842,10 +29562,16 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) ( // // Arguments: // -// window_size: A scalar representing the number of elements to accumulate in a window. +// size: A scalar representing the number of elements to accumulate in a window. +// shift: A scalar representing the steps moving the sliding window forward in one +// iteration. It must be positive. +// stride: A scalar representing the stride of the input elements of the sliding window. +// It must be positive. +// drop_remainder: A scalar representing whether a window should be dropped in case its size is +// smaller than desired. // // -func WindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } @@ -28853,7 +29579,7 @@ func WindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, opspec := tf.OpSpec{ Type: "WindowDataset", Input: []tf.Input{ - input_dataset, window_size, + input_dataset, size, shift, stride, drop_remainder, }, Attrs: attrs, } @@ -30063,7 +30789,7 @@ func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, st // // [1, 13, 3, 14, 14, 6, 7, 20] // -// See @{tf.scatter_nd} for more details about how to make updates to slices. +// See `tf.scatter_nd` for more details about how to make updates to slices. // // Arguments: // input: A Tensor. @@ -30680,6 +31406,41 @@ func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncomp return op.Output(0) } +// Generate the bucket boundaries for each feature based on accumulated summaries. +// +// An op that returns a list of float tensors for a quantile stream resource. Each +// tensor is Rank 1 containing bucket boundaries for a single feature. +// +// Arguments: +// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. +// num_features: inferred int; number of features to get bucket boundaries for. +// +// Returns float; List of Rank 1 Tensors each containing the bucket boundaries for a feature. +func BoostedTreesQuantileStreamResourceGetBucketBoundaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (bucket_boundaries []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_features": num_features} + opspec := tf.OpSpec{ + Type: "BoostedTreesQuantileStreamResourceGetBucketBoundaries", + Input: []tf.Input{ + quantile_stream_resource_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if bucket_boundaries, idx, err = makeOutputList(op, idx, "bucket_boundaries"); err != nil { + scope.UpdateErr("BoostedTreesQuantileStreamResourceGetBucketBoundaries", err) + return + } + return bucket_boundaries +} + // OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage. type OrderedMapUnstageAttr func(optionalAttr) @@ -30751,6 +31512,43 @@ func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes [] return values } +// BoostedTreesQuantileStreamResourceHandleOpAttr is an optional argument to BoostedTreesQuantileStreamResourceHandleOp. +type BoostedTreesQuantileStreamResourceHandleOpAttr func(optionalAttr) + +// BoostedTreesQuantileStreamResourceHandleOpContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func BoostedTreesQuantileStreamResourceHandleOpContainer(value string) BoostedTreesQuantileStreamResourceHandleOpAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// BoostedTreesQuantileStreamResourceHandleOpSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func BoostedTreesQuantileStreamResourceHandleOpSharedName(value string) BoostedTreesQuantileStreamResourceHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a BoostedTreesQuantileStreamResource. +func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...BoostedTreesQuantileStreamResourceHandleOpAttr) (resource tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "BoostedTreesQuantileStreamResourceHandleOp", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // OrderedMapSizeAttr is an optional argument to OrderedMapSize. type OrderedMapSizeAttr func(optionalAttr) -- GitLab From ff2e46cd768b9161235f10f6f8bbb23cb27314dc Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 18 Sep 2018 18:18:43 -0700 Subject: [PATCH 0352/1357] Update the grappler plugin to support the @defun generated function and ops. PiperOrigin-RevId: 213554813 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../experimental_implementation_selector.cc | 48 +++++++++++++------ ...perimental_implementation_selector_test.cc | 5 +- .../grappler/optimizers/meta_optimizer.cc | 15 +++++- tensorflow/python/eager/function_test.py | 46 ++++++++++++++++++ 5 files changed, 95 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index f094c151e6..029205248b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -515,6 +515,7 @@ cc_library( ":custom_graph_optimizer_registry", ":debug_stripper", ":dependency_optimizer", + ":experimental_implementation_selector", ":function_optimizer", ":graph_optimizer", ":layout_optimizer", diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc b/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc index eeea269fb0..2c36c9b7b3 100644 --- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc +++ b/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc @@ -32,8 +32,6 @@ limitations under the License. namespace tensorflow { namespace grappler { -REGISTER_GRAPH_OPTIMIZER(ExperimentalImplementationSelector); - Status ExperimentalImplementationSelector::LoadFunctions( const GraphDef& graph) { lib_info_.reset(new FunctionLibraryApiInfo); @@ -43,8 +41,20 @@ Status ExperimentalImplementationSelector::LoadFunctions( Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall( NodeDef* node_def) const { - const FunctionApiInfo* info = lib_info_->GetApiInfo(node_def->op()); - if (info == nullptr) { + // There are two ways of calling functions: + // 1. By specifying an op name as a function name, or + // 2. Via the @defun functional interface, where the real function name + // appear as the attribute with type func. + std::vector function_attribute_names; + for (const auto& attr : node_def->attr()) { + if (attr.second.has_func() && + lib_info_->GetApiInfo(attr.second.func().name()) != nullptr) { + function_attribute_names.emplace_back(attr.first); + } + } + + if (function_attribute_names.empty() && + lib_info_->GetApiInfo(node_def->op()) == nullptr) { // A regular op, or a function which has no interface. return Status::OK(); } @@ -58,17 +68,25 @@ Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall( DeviceNameUtils::ParsedName parsed_name; DeviceNameUtils::ParseLocalName(device, &parsed_name); - string best_function_name; - lib_info_->GetBestImplementation(node_def->op(), parsed_name.type, - &best_function_name); - if (node_def->op() != best_function_name) { - // The current implementation is not the best, swap the op to the best one. - // There will be duplicates in the graph and they will be pruned by other - // grappler plugin since no other node is using their output as inputs. - // TODO(scottzhu): Update the tf.eager.defun to register functions without - // having to call them with input data. That will reduce the graph size and - // save the work for prune them. - node_def->set_op(best_function_name); + for (const auto& attr_name : function_attribute_names) { + string function_name = node_def->attr().at(attr_name).func().name(); + string best_function_name; + lib_info_->GetBestImplementation(function_name, parsed_name.type, + &best_function_name); + if (function_name != best_function_name) { + node_def->mutable_attr() + ->find(attr_name) + ->second.mutable_func() + ->set_name(best_function_name); + } + } + if (lib_info_->GetApiInfo(node_def->op()) != nullptr) { + string best_function_name; + lib_info_->GetBestImplementation(node_def->op(), parsed_name.type, + &best_function_name); + if (node_def->op() != best_function_name) { + node_def->set_op(best_function_name); + } } return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc b/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc index 2368e577c2..3f1ebefac6 100644 --- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc +++ b/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc @@ -45,9 +45,8 @@ TEST_F(ExperimentalImplementationSelectorTest, NoUpdate) { GrapplerItem item; CHECK(fake_input.NextItem(&item)); - std::unique_ptr optimizer = - CustomGraphOptimizerRegistry::CreateByNameOrNull( - "ExperimentalImplementationSelector"); + std::unique_ptr optimizer( + new ExperimentalImplementationSelector); ASSERT_NE(nullptr, optimizer); TF_ASSERT_OK(optimizer->Init()); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7ed4a67333..1ed1b22931 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/optimizers/debug_stripper.h" #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" +#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h" #include "tensorflow/core/grappler/optimizers/function_optimizer.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/optimizers/loop_optimizer.h" @@ -196,8 +197,18 @@ Status MetaOptimizer::InitializeOptimizersByName( Status MetaOptimizer::InitializeCustomGraphOptimizers( std::vector>* optimizers) const { for (const auto& optimizer_config : cfg_.custom_optimizers()) { - auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull( - optimizer_config.name()); + // Initialize the ExperimentalImplementationSelector here instead of + // CustomizeOptimizer registry, due the static link issue in TensorRT for + // double registry. + // TODO(laigd): Remove this hack and change it back to use the registry once + // the duplicate static import issue is fixed. + std::unique_ptr custom_optimizer; + if (optimizer_config.name() == "ExperimentalImplementationSelector") { + custom_optimizer.reset(new ExperimentalImplementationSelector()); + } else { + custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull( + optimizer_config.name()); + } if (custom_optimizer) { VLOG(2) << "Registered custom configurable graph optimizer: " << optimizer_config.name(); diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 6326a5b45f..4a1bde3f5e 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -26,6 +26,7 @@ import weakref import numpy from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python import keras from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import backprop @@ -1729,6 +1730,51 @@ class FunctionTest(test.TestCase): 'be Tensors;.*'): graph_function('Not a Tensor.') + def testSwapImplementationWithGrapplerPlugin(self): + rewrites = rewriter_config_pb2.RewriterConfig() + # function_optimizer has to be turn off, otherwise it will delete the + # registered function if it does not get called. + # TODO(scottzhu): Move the ExperimentalImplementationSelector to be called + # before function_optimizer in future. + rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF + customer_optimizer = rewrites.custom_optimizers.add() + customer_optimizer.name = 'ExperimentalImplementationSelector' + rewrites.min_graph_nodes = -1 + graph_options = config_pb2.GraphOptions( + rewrite_options=rewrites, build_cost_model=1) + config = config_pb2.ConfigProto(graph_options=graph_options) + + with context.graph_mode(), self.cached_session( + config=config, graph=ops.Graph(), use_gpu=True) as sess: + + @function.defun_with_attributes( + attributes={ + 'experimental_api_implements': 'random_boost', + 'experimental_api_preferred_device': 'CPU' + }) + def cpu_boost(x): + return math_ops.add(x, 2.0) + + @function.defun_with_attributes( + attributes={ + 'experimental_api_implements': 'random_boost', + 'experimental_api_preferred_device': 'GPU' + }) + def gpu_boost(x): + return math_ops.add(x, 4.0) + + x = constant_op.constant(1.0) + + function.register(cpu_boost, x) + y = gpu_boost(x) + y_value = sess.run(y) + + if test.is_gpu_available(): + self.assertEquals(y_value, 5.0) + else: + # Grappler fallback to use the CPU impl even called with GPU function. + self.assertEquals(y_value, 3.0) + @test_util.with_c_shapes class AutomaticControlDependenciesTest(test.TestCase): -- GitLab From 9ee75bb6e29007b8b5ea4a6d981996d8a4d88373 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 18 Sep 2018 18:31:37 -0700 Subject: [PATCH 0353/1357] [tf.data] Add a test for state persistence between iterators over the same MapDataset. PiperOrigin-RevId: 213555982 --- .../data/kernel_tests/map_dataset_op_test.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py index 2ab74beb32..ae04995436 100644 --- a/tensorflow/python/data/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/map_dataset_op_test.py @@ -397,6 +397,28 @@ class MapDatasetTest(test.TestCase, parameterized.TestCase): # Randomness is repeatable given same seed self.assertAllClose(random_values, random_values_2) + def testStatefulMapKeepsStateAcrossIterators(self): + iterator = (dataset_ops.Dataset.from_tensors(0).repeat(10) + .map(lambda _: random_ops.random_uniform((), seed=11)) + .repeat(1000) + .batch(10) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.cached_session() as sess: + sess.run(init_op) + random_values = sess.run(get_next) + + # Assert that one of the next 99 batches yielded by the iterator is + # different from the first. + i = 0 + while i < 99: + if np.any(random_values != sess.run(get_next)): + break + i += 1 + self.assertLess(i, 99) + def testMapDict(self): iterator = (dataset_ops.Dataset.range(10) .map(lambda x: {"foo": x * 2, "bar": x ** 2}) -- GitLab From 9fe177881224571aff0c267593f747f5fd7a2967 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 18 Sep 2018 19:39:27 -0700 Subject: [PATCH 0354/1357] Getting DNNModel to work with the new feature columns. PiperOrigin-RevId: 213561495 --- .../estimator/dnn_with_layer_annotations.py | 15 +- tensorflow/python/estimator/BUILD | 2 + tensorflow/python/estimator/canned/dnn.py | 181 ++++++++++---- .../estimator/canned/dnn_linear_combined.py | 7 +- .../python/estimator/canned/dnn_test.py | 146 +++++++++-- .../estimator/canned/dnn_testing_utils.py | 227 ++++++++++++++---- .../python/feature_column/feature_column.py | 12 +- .../feature_column/feature_column_v2.py | 14 ++ 8 files changed, 477 insertions(+), 127 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py index 152431d1b2..a8eeff6f6d 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py @@ -76,6 +76,7 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode): weight_collections=None, trainable=True, cols_to_vars=None, + scope=None, cols_to_output_tensors=None): """Returns a dense `Tensor` as input layer based on given `feature_columns`. @@ -112,6 +113,7 @@ def make_input_layer_with_layer_annotations(original_input_layer, mode): 'some_variable:0' shape=(5, 10),